diff options
914 files changed, 20269 insertions, 6187 deletions
diff --git a/Documentation/ABI/stable/sysfs-class-bluetooth b/Documentation/ABI/stable/sysfs-class-bluetooth new file mode 100644 index 000000000000..36be02471174 --- /dev/null +++ b/Documentation/ABI/stable/sysfs-class-bluetooth @@ -0,0 +1,9 @@ +What: /sys/class/bluetooth/hci<index>/reset +Date: 14-Jan-2025 +KernelVersion: 6.13 +Contact: linux-bluetooth@vger.kernel.org +Description: This write-only attribute allows users to trigger the vendor reset + method on the Bluetooth device when arbitrary data is written. + The reset may or may not be done through the device transport + (e.g., UART/USB), and can also be done through an out-of-band + approach such as GPIO. diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index 955fbcd19ce9..f273ea15a8e8 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -293,3 +293,13 @@ The following keys are defined: * :c:macro:`RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED`: Misaligned vector accesses are not supported at all and will generate a misaligned address fault. + +* :c:macro:`RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0`: A bitmask containing the + thead vendor extensions that are compatible with the + :c:macro:`RISCV_HWPROBE_BASE_BEHAVIOR_IMA`: base system behavior. + + * T-HEAD + + * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XTHEADVECTOR`: The xtheadvector vendor + extension is supported in the T-Head ISA extensions spec starting from + commit a18c801634 ("Add T-Head VECTOR vendor extension. "). diff --git a/Documentation/block/ublk.rst b/Documentation/block/ublk.rst index 51665a3e6a50..1e0e7358e14a 100644 --- a/Documentation/block/ublk.rst +++ b/Documentation/block/ublk.rst @@ -333,6 +333,4 @@ References .. [#userspace_readme] https://github.com/ming1/ubdsrv/blob/master/README -.. [#stefan] https://lore.kernel.org/linux-block/YoOr6jBfgVm8GvWg@stefanha-x1.localdomain/ - .. [#xiaoguang] https://lore.kernel.org/linux-block/YoOr6jBfgVm8GvWg@stefanha-x1.localdomain/ diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml b/Documentation/devicetree/bindings/arm/fsl.yaml index 371317554863..0db2cbd7891f 100644 --- a/Documentation/devicetree/bindings/arm/fsl.yaml +++ b/Documentation/devicetree/bindings/arm/fsl.yaml @@ -1091,6 +1091,7 @@ properties: - dmo,imx8mp-data-modul-edm-sbc # i.MX8MP eDM SBC - emcraft,imx8mp-navqp # i.MX8MP Emcraft Systems NavQ+ Kit - fsl,imx8mp-evk # i.MX8MP EVK Board + - fsl,imx8mp-evk-revb4 # i.MX8MP EVK Rev B4 Board - gateworks,imx8mp-gw71xx-2x # i.MX8MP Gateworks Board - gateworks,imx8mp-gw72xx-2x # i.MX8MP Gateworks Board - gateworks,imx8mp-gw73xx-2x # i.MX8MP Gateworks Board @@ -1271,6 +1272,7 @@ properties: items: - enum: - fsl,imx8qm-mek # i.MX8QM MEK Board + - fsl,imx8qm-mek-revd # i.MX8QM MEK Rev D Board - toradex,apalis-imx8 # Apalis iMX8 Modules - toradex,apalis-imx8-v1.1 # Apalis iMX8 V1.1 Modules - const: fsl,imx8qm @@ -1299,6 +1301,7 @@ properties: - enum: - einfochips,imx8qxp-ai_ml # i.MX8QXP AI_ML Board - fsl,imx8qxp-mek # i.MX8QXP MEK Board + - fsl,imx8qxp-mek-wcpu # i.MX8QXP MEK WCPU Board - const: fsl,imx8qxp - description: i.MX8DXL based Boards diff --git a/Documentation/devicetree/bindings/dma/adi,axi-dmac.txt b/Documentation/devicetree/bindings/dma/adi,axi-dmac.txt deleted file mode 100644 index cd17684aaab5..000000000000 --- a/Documentation/devicetree/bindings/dma/adi,axi-dmac.txt +++ /dev/null @@ -1,61 +0,0 @@ -Analog Devices AXI-DMAC DMA controller - -Required properties: - - compatible: Must be "adi,axi-dmac-1.00.a". - - reg: Specification for the controllers memory mapped register map. - - interrupts: Specification for the controllers interrupt. - - clocks: Phandle and specifier to the controllers AXI interface clock - - #dma-cells: Must be 1. - -Required sub-nodes: - - adi,channels: This sub-node must contain a sub-node for each DMA channel. For - the channel sub-nodes the following bindings apply. They must match the - configuration options of the peripheral as it was instantiated. - -Required properties for adi,channels sub-node: - - #size-cells: Must be 0 - - #address-cells: Must be 1 - -Required channel sub-node properties: - - reg: Which channel this node refers to. - - adi,source-bus-width, - adi,destination-bus-width: Width of the source or destination bus in bits. - - adi,source-bus-type, - adi,destination-bus-type: Type of the source or destination bus. Must be one - of the following: - 0 (AXI_DMAC_TYPE_AXI_MM): Memory mapped AXI interface - 1 (AXI_DMAC_TYPE_AXI_STREAM): Streaming AXI interface - 2 (AXI_DMAC_TYPE_AXI_FIFO): FIFO interface - -Deprecated optional channel properties: - - adi,length-width: Width of the DMA transfer length register. - - adi,cyclic: Must be set if the channel supports hardware cyclic DMA - transfers. - - adi,2d: Must be set if the channel supports hardware 2D DMA transfers. - -DMA clients connected to the AXI-DMAC DMA controller must use the format -described in the dma.txt file using a one-cell specifier. The value of the -specifier refers to the DMA channel index. - -Example: - -dma: dma@7c420000 { - compatible = "adi,axi-dmac-1.00.a"; - reg = <0x7c420000 0x10000>; - interrupts = <0 57 0>; - clocks = <&clkc 16>; - #dma-cells = <1>; - - adi,channels { - #size-cells = <0>; - #address-cells = <1>; - - dma-channel@0 { - reg = <0>; - adi,source-bus-width = <32>; - adi,source-bus-type = <ADI_AXI_DMAC_TYPE_MM_AXI>; - adi,destination-bus-width = <64>; - adi,destination-bus-type = <ADI_AXI_DMAC_TYPE_FIFO>; - }; - }; -}; diff --git a/Documentation/devicetree/bindings/dma/adi,axi-dmac.yaml b/Documentation/devicetree/bindings/dma/adi,axi-dmac.yaml new file mode 100644 index 000000000000..63b6fb0423c2 --- /dev/null +++ b/Documentation/devicetree/bindings/dma/adi,axi-dmac.yaml @@ -0,0 +1,129 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/dma/adi,axi-dmac.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Analog Devices AXI-DMAC DMA controller + +description: | + FPGA-based DMA controller designed for use with high-speed converter hardware. + + http://analogdevicesinc.github.io/hdl/library/axi_dmac/index.html + +maintainers: + - Nuno Sa <nuno.sa@analog.com> + +additionalProperties: false + +properties: + compatible: + const: adi,axi-dmac-1.00.a + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + + "#dma-cells": + const: 1 + + adi,channels: + deprecated: true + type: object + description: + This sub-node must contain a sub-node for each DMA channel. This node is + only required for IP versions older than 4.3.a and should otherwise be + omitted. + additionalProperties: false + + properties: + "#size-cells": + const: 0 + "#address-cells": + const: 1 + + patternProperties: + "^dma-channel@[0-9a-f]+$": + type: object + description: + DMA channel properties based on HDL compile-time configuration. + additionalProperties: false + + properties: + reg: + maxItems: 1 + + adi,source-bus-width: + $ref: /schemas/types.yaml#/definitions/uint32 + description: Width of the source bus in bits. + enum: [8, 16, 32, 64, 128] + + adi,destination-bus-width: + $ref: /schemas/types.yaml#/definitions/uint32 + description: Width of the destination bus in bits. + enum: [8, 16, 32, 64, 128] + + adi,source-bus-type: + $ref: /schemas/types.yaml#/definitions/uint32 + description: | + Type of the source bus. + + 0: Memory mapped AXI interface + 1: Streaming AXI interface + 2: FIFO interface + enum: [0, 1, 2] + + adi,destination-bus-type: + $ref: /schemas/types.yaml#/definitions/uint32 + description: Type of the destination bus (see adi,source-bus-type). + enum: [0, 1, 2] + + adi,length-width: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: Width of the DMA transfer length register. + + adi,cyclic: + deprecated: true + type: boolean + description: + Must be set if the channel supports hardware cyclic DMA transfers. + + adi,2d: + deprecated: true + type: boolean + description: + Must be set if the channel supports hardware 2D DMA transfers. + + required: + - reg + - adi,source-bus-width + - adi,destination-bus-width + - adi,source-bus-type + - adi,destination-bus-type + + required: + - "#size-cells" + - "#address-cells" + +required: + - compatible + - reg + - interrupts + - clocks + - "#dma-cells" + +examples: + - | + dma-controller@7c420000 { + compatible = "adi,axi-dmac-1.00.a"; + reg = <0x7c420000 0x10000>; + interrupts = <0 57 0>; + clocks = <&clkc 16>; + #dma-cells = <1>; + }; diff --git a/Documentation/devicetree/bindings/dma/allwinner,sun4i-a10-dma.yaml b/Documentation/devicetree/bindings/dma/allwinner,sun4i-a10-dma.yaml index 02d5bd035409..9b5180c0a7c4 100644 --- a/Documentation/devicetree/bindings/dma/allwinner,sun4i-a10-dma.yaml +++ b/Documentation/devicetree/bindings/dma/allwinner,sun4i-a10-dma.yaml @@ -22,7 +22,9 @@ properties: number. compatible: - const: allwinner,sun4i-a10-dma + enum: + - allwinner,sun4i-a10-dma + - allwinner,suniv-f1c100s-dma reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/dma/atmel,sama5d4-dma.yaml b/Documentation/devicetree/bindings/dma/atmel,sama5d4-dma.yaml new file mode 100644 index 000000000000..9ca1c5d1f00f --- /dev/null +++ b/Documentation/devicetree/bindings/dma/atmel,sama5d4-dma.yaml @@ -0,0 +1,79 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/dma/atmel,sama5d4-dma.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Microchip AT91 Extensible Direct Memory Access Controller + +maintainers: + - Nicolas Ferre <nicolas.ferre@microchip.com> + - Charan Pedumuru <charan.pedumuru@microchip.com> + +description: + The DMA Controller (XDMAC) is a AHB-protocol central direct memory access + controller. It performs peripheral data transfer and memory move operations + over one or two bus ports through the unidirectional communication + channel. Each channel is fully programmable and provides both peripheral + or memory-to-memory transfers. The channel features are configurable at + implementation. + +allOf: + - $ref: dma-controller.yaml# + +properties: + compatible: + oneOf: + - enum: + - atmel,sama5d4-dma + - microchip,sama7g5-dma + - items: + - enum: + - microchip,sam9x60-dma + - microchip,sam9x7-dma + - const: atmel,sama5d4-dma + + "#dma-cells": + description: | + Represents the number of integer cells in the `dmas` property of client + devices. The single cell specifies the channel configuration register: + - bit 13: SIF (Source Interface Identifier) for memory interface. + - bit 14: DIF (Destination Interface Identifier) for peripheral interface. + - bit 30-24: PERID (Peripheral Identifier). + const: 1 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + + clock-names: + const: dma_clk + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + - "#dma-cells" + +unevaluatedProperties: false + +examples: + - | + #include <dt-bindings/clock/at91.h> + #include <dt-bindings/dma/at91.h> + #include <dt-bindings/interrupt-controller/irq.h> + dma-controller@f0008000 { + compatible = "atmel,sama5d4-dma"; + reg = <0xf0008000 0x1000>; + interrupts = <20 IRQ_TYPE_LEVEL_HIGH 0>; + #dma-cells = <1>; + clocks = <&pmc PMC_TYPE_PERIPHERAL 20>; + clock-names = "dma_clk"; + }; diff --git a/Documentation/devicetree/bindings/dma/atmel-xdma.txt b/Documentation/devicetree/bindings/dma/atmel-xdma.txt deleted file mode 100644 index 76d649b3a25d..000000000000 --- a/Documentation/devicetree/bindings/dma/atmel-xdma.txt +++ /dev/null @@ -1,54 +0,0 @@ -* Atmel Extensible Direct Memory Access Controller (XDMAC) - -* XDMA Controller -Required properties: -- compatible: Should be "atmel,sama5d4-dma", "microchip,sam9x60-dma" or - "microchip,sama7g5-dma" or - "microchip,sam9x7-dma", "atmel,sama5d4-dma". -- reg: Should contain DMA registers location and length. -- interrupts: Should contain DMA interrupt. -- #dma-cells: Must be <1>, used to represent the number of integer cells in -the dmas property of client devices. - - The 1st cell specifies the channel configuration register: - - bit 13: SIF, source interface identifier, used to get the memory - interface identifier, - - bit 14: DIF, destination interface identifier, used to get the peripheral - interface identifier, - - bit 30-24: PERID, peripheral identifier. - -Example: - -dma1: dma-controller@f0004000 { - compatible = "atmel,sama5d4-dma"; - reg = <0xf0004000 0x200>; - interrupts = <50 4 0>; - #dma-cells = <1>; -}; - - -* DMA clients -DMA clients connected to the Atmel XDMA controller must use the format -described in the dma.txt file, using a one-cell specifier for each channel. -The two cells in order are: -1. A phandle pointing to the DMA controller. -2. Channel configuration register. Configurable fields are: - - bit 13: SIF, source interface identifier, used to get the memory - interface identifier, - - bit 14: DIF, destination interface identifier, used to get the peripheral - interface identifier, - - bit 30-24: PERID, peripheral identifier. - -Example: - -i2c2: i2c@f8024000 { - compatible = "atmel,at91sam9x5-i2c"; - reg = <0xf8024000 0x4000>; - interrupts = <34 4 6>; - dmas = <&dma1 - (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) - | AT91_XDMAC_DT_PERID(6))>, - <&dma1 - (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) - | AT91_XDMAC_DT_PERID(7))>; - dma-names = "tx", "rx"; -}; diff --git a/Documentation/devicetree/bindings/dma/fsl,edma.yaml b/Documentation/devicetree/bindings/dma/fsl,edma.yaml index d54140f18d34..4f925469533e 100644 --- a/Documentation/devicetree/bindings/dma/fsl,edma.yaml +++ b/Documentation/devicetree/bindings/dma/fsl,edma.yaml @@ -26,9 +26,13 @@ properties: - fsl,imx93-edma3 - fsl,imx93-edma4 - fsl,imx95-edma5 + - nxp,s32g2-edma - items: - const: fsl,ls1028a-edma - const: fsl,vf610-edma + - items: + - const: nxp,s32g3-edma + - const: nxp,s32g2-edma reg: minItems: 1 @@ -221,6 +225,36 @@ allOf: properties: power-domains: false + - if: + properties: + compatible: + contains: + const: nxp,s32g2-edma + then: + properties: + clocks: + minItems: 2 + maxItems: 2 + clock-names: + items: + - const: dmamux0 + - const: dmamux1 + interrupts: + minItems: 3 + maxItems: 3 + interrupt-names: + items: + - const: tx-0-15 + - const: tx-16-31 + - const: err + reg: + minItems: 3 + maxItems: 3 + "#dma-cells": + const: 2 + dma-channels: + const: 32 + unevaluatedProperties: false examples: diff --git a/Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.yaml b/Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.yaml index 877147e95ecc..d3f8c269916c 100644 --- a/Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.yaml +++ b/Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.yaml @@ -13,9 +13,6 @@ description: | maintainers: - Jon Hunter <jonathanh@nvidia.com> -allOf: - - $ref: dma-controller.yaml# - properties: compatible: oneOf: @@ -29,7 +26,19 @@ properties: - const: nvidia,tegra186-adma reg: - maxItems: 1 + description: + The 'page' region describes the address space of the page + used for accessing the DMA channel registers. The 'global' + region describes the address space of the global DMA registers. + In the absence of the 'reg-names' property, there must be a + single entry that covers the address space of the global DMA + registers and the DMA channel registers. + minItems: 1 + maxItems: 2 + + reg-names: + minItems: 1 + maxItems: 2 interrupts: description: | @@ -63,6 +72,49 @@ required: - clocks - clock-names +allOf: + - $ref: dma-controller.yaml# + - if: + properties: + compatible: + contains: + enum: + - nvidia,tegra210-adma + then: + properties: + reg: + items: + - description: Full address space range of DMA registers. + + - if: + properties: + compatible: + contains: + enum: + - nvidia,tegra186-adma + then: + anyOf: + - properties: + reg: + items: + - description: Full address space range of DMA registers. + - properties: + reg: + items: + - description: Channel Page address space range of DMA registers. + reg-names: + items: + - const: page + - properties: + reg: + items: + - description: Channel Page address space range of DMA registers. + - description: Global Page address space range of DMA registers. + reg-names: + items: + - const: page + - const: global + additionalProperties: false examples: diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml index 4ad56a409b9c..7052468b15c8 100644 --- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml +++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml @@ -25,7 +25,9 @@ properties: - items: - enum: - qcom,qcm2290-gpi-dma + - qcom,qcs8300-gpi-dma - qcom,qdu1000-gpi-dma + - qcom,sa8775p-gpi-dma - qcom,sar2130p-gpi-dma - qcom,sc7280-gpi-dma - qcom,sdx75-gpi-dma @@ -35,10 +37,12 @@ properties: - qcom,sm8450-gpi-dma - qcom,sm8550-gpi-dma - qcom,sm8650-gpi-dma + - qcom,sm8750-gpi-dma - qcom,x1e80100-gpi-dma - const: qcom,sm6350-gpi-dma - items: - enum: + - qcom,qcs615-gpi-dma - qcom,sdm670-gpi-dma - qcom,sm6125-gpi-dma - qcom,sm8150-gpi-dma diff --git a/Documentation/devicetree/bindings/dma/stm32/st,stm32-dmamux.yaml b/Documentation/devicetree/bindings/dma/stm32/st,stm32-dmamux.yaml index f26c914a3a9a..b7bca1a83769 100644 --- a/Documentation/devicetree/bindings/dma/stm32/st,stm32-dmamux.yaml +++ b/Documentation/devicetree/bindings/dma/stm32/st,stm32-dmamux.yaml @@ -15,6 +15,16 @@ allOf: properties: "#dma-cells": const: 3 + description: | + Each cell represents the following: + 1. The mux input number/line for the request + 2. Bitfield representing DMA channel configuration that is passed + to the real DMA controller + 3. Bitfield representing device dependent DMA features passed to + the real DMA controller + + For bitfield definitions of cells 2 and 3, see the associated + bindings doc for the actual DMA controller in st,stm32-dma.yaml. compatible: const: st,stm32h7-dmamux diff --git a/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml index 27b8e1636560..b5bc842c5a0e 100644 --- a/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml +++ b/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml @@ -34,6 +34,7 @@ properties: - ti,am62a-dmss-bcdma-csirx - ti,am64-dmss-bcdma - ti,j721s2-dmss-bcdma-csi + - ti,j722s-dmss-bcdma-csi reg: minItems: 3 @@ -196,7 +197,9 @@ allOf: properties: compatible: contains: - const: ti,j721s2-dmss-bcdma-csi + enum: + - ti,j721s2-dmss-bcdma-csi + - ti,j722s-dmss-bcdma-csi then: properties: ti,sci-rm-range-bchan: false diff --git a/Documentation/devicetree/bindings/net/qcom,ethqos.yaml b/Documentation/devicetree/bindings/net/qcom,ethqos.yaml index f117471fb06f..e7ee0d9efed8 100644 --- a/Documentation/devicetree/bindings/net/qcom,ethqos.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ethqos.yaml @@ -22,12 +22,12 @@ properties: oneOf: - items: - enum: - - qcom,qcs8300-ethqos - - const: qcom,sa8775p-ethqos + - qcom,qcs615-ethqos + - const: qcom,qcs404-ethqos - items: - enum: - - qcom,qcs615-ethqos - - const: qcom,sm8150-ethqos + - qcom,qcs8300-ethqos + - const: qcom,sa8775p-ethqos - enum: - qcom,qcs404-ethqos - qcom,sa8775p-ethqos diff --git a/Documentation/devicetree/bindings/phy/phy-rockchip-naneng-combphy.yaml b/Documentation/devicetree/bindings/phy/phy-rockchip-naneng-combphy.yaml index d3cd7997879f..1b3de6678c08 100644 --- a/Documentation/devicetree/bindings/phy/phy-rockchip-naneng-combphy.yaml +++ b/Documentation/devicetree/bindings/phy/phy-rockchip-naneng-combphy.yaml @@ -13,6 +13,7 @@ properties: compatible: enum: - rockchip,rk3568-naneng-combphy + - rockchip,rk3576-naneng-combphy - rockchip,rk3588-naneng-combphy reg: diff --git a/Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml index 58ce2d91d28c..f60804687412 100644 --- a/Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml @@ -15,12 +15,21 @@ description: properties: compatible: - enum: - - qcom,ipq6018-qmp-pcie-phy - - qcom,ipq8074-qmp-gen3-pcie-phy - - qcom,ipq8074-qmp-pcie-phy - - qcom,ipq9574-qmp-gen3x1-pcie-phy - - qcom,ipq9574-qmp-gen3x2-pcie-phy + oneOf: + - enum: + - qcom,ipq6018-qmp-pcie-phy + - qcom,ipq8074-qmp-gen3-pcie-phy + - qcom,ipq8074-qmp-pcie-phy + - qcom,ipq9574-qmp-gen3x1-pcie-phy + - qcom,ipq9574-qmp-gen3x2-pcie-phy + - items: + - enum: + - qcom,ipq5424-qmp-gen3x1-pcie-phy + - const: qcom,ipq9574-qmp-gen3x1-pcie-phy + - items: + - enum: + - qcom,ipq5424-qmp-gen3x2-pcie-phy + - const: qcom,ipq9574-qmp-gen3x2-pcie-phy reg: items: diff --git a/Documentation/devicetree/bindings/phy/qcom,qusb2-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,qusb2-phy.yaml index 4aed4b5d65ec..39851ba9de43 100644 --- a/Documentation/devicetree/bindings/phy/qcom,qusb2-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,qusb2-phy.yaml @@ -18,6 +18,7 @@ properties: oneOf: - items: - enum: + - qcom,ipq5424-qusb2-phy - qcom,ipq6018-qusb2-phy - qcom,ipq8074-qusb2-phy - qcom,ipq9574-qusb2-phy diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml index 13fdf5f1beba..89391649e0b5 100644 --- a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml @@ -16,8 +16,10 @@ description: properties: compatible: enum: + - qcom,qcs615-qmp-gen3x1-pcie-phy - qcom,sa8775p-qmp-gen4x2-pcie-phy - qcom,sa8775p-qmp-gen4x4-pcie-phy + - qcom,sar2130p-qmp-gen3x2-pcie-phy - qcom,sc8180x-qmp-pcie-phy - qcom,sc8280xp-qmp-gen3x1-pcie-phy - qcom,sc8280xp-qmp-gen3x2-pcie-phy @@ -32,6 +34,7 @@ properties: - qcom,sm8250-qmp-gen3x2-pcie-phy - qcom,sm8250-qmp-modem-pcie-phy - qcom,sm8350-qmp-gen3x1-pcie-phy + - qcom,sm8350-qmp-gen3x2-pcie-phy - qcom,sm8450-qmp-gen3x1-pcie-phy - qcom,sm8450-qmp-gen4x2-pcie-phy - qcom,sm8550-qmp-gen3x2-pcie-phy @@ -139,6 +142,7 @@ allOf: compatible: contains: enum: + - qcom,sar2130p-qmp-gen3x2-pcie-phy - qcom,sc8180x-qmp-pcie-phy - qcom,sdm845-qhp-pcie-phy - qcom,sdm845-qmp-pcie-phy @@ -149,6 +153,7 @@ allOf: - qcom,sm8250-qmp-gen3x2-pcie-phy - qcom,sm8250-qmp-modem-pcie-phy - qcom,sm8350-qmp-gen3x1-pcie-phy + - qcom,sm8350-qmp-gen3x2-pcie-phy - qcom,sm8450-qmp-gen3x1-pcie-phy - qcom,sm8450-qmp-gen3x2-pcie-phy - qcom,sm8550-qmp-gen3x2-pcie-phy @@ -167,6 +172,7 @@ allOf: compatible: contains: enum: + - qcom,qcs615-qmp-gen3x1-pcie-phy - qcom,sc8280xp-qmp-gen3x1-pcie-phy - qcom,sc8280xp-qmp-gen3x2-pcie-phy - qcom,sc8280xp-qmp-gen3x4-pcie-phy diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml index baf5134ea3d8..a1b55168e050 100644 --- a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml @@ -16,6 +16,7 @@ description: properties: compatible: enum: + - qcom,ipq5424-qmp-usb3-phy - qcom,ipq6018-qmp-usb3-phy - qcom,ipq8074-qmp-usb3-phy - qcom,ipq9574-qmp-usb3-phy @@ -89,6 +90,7 @@ allOf: compatible: contains: enum: + - qcom,ipq5424-qmp-usb3-phy - qcom,ipq6018-qmp-usb3-phy - qcom,ipq8074-qmp-usb3-phy - qcom,ipq9574-qmp-usb3-phy diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml index 2d0d7e9e6431..358a6736a951 100644 --- a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml @@ -16,6 +16,7 @@ description: properties: compatible: enum: + - qcom,sar2130p-qmp-usb3-dp-phy - qcom,sc7180-qmp-usb3-dp-phy - qcom,sc7280-qmp-usb3-dp-phy - qcom,sc8180x-qmp-usb3-dp-phy @@ -127,6 +128,7 @@ allOf: properties: compatible: enum: + - qcom,sar2130p-qmp-usb3-dp-phy - qcom,sc8280xp-qmp-usb43dp-phy - qcom,sm6350-qmp-usb3-dp-phy - qcom,sm8550-qmp-usb3-dp-phy diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml index acb5b9ba6f04..2c72f148a74b 100644 --- a/Documentation/devicetree/bindings/riscv/cpus.yaml +++ b/Documentation/devicetree/bindings/riscv/cpus.yaml @@ -26,6 +26,18 @@ description: | allOf: - $ref: /schemas/cpu.yaml# - $ref: extensions.yaml + - if: + not: + properties: + compatible: + contains: + enum: + - thead,c906 + - thead,c910 + - thead,c920 + then: + properties: + thead,vlenb: false properties: compatible: @@ -96,6 +108,13 @@ properties: description: The blocksize in bytes for the Zicboz cache operations. + thead,vlenb: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + VLEN/8, the vector register length in bytes. This property is required on + thead systems where the vector register length is not identical on all harts, or + the vlenb CSR is not available. + # RISC-V has multiple properties for cache op block sizes as the sizes # differ between individual CBO extensions cache-op-block-size: false diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml index 9c7dd7e75e0c..a63b994e0763 100644 --- a/Documentation/devicetree/bindings/riscv/extensions.yaml +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml @@ -621,6 +621,10 @@ properties: latency, as ratified in commit 56ed795 ("Update riscv-crypto-spec-vector.adoc") of riscv-crypto. + # vendor extensions, each extension sorted alphanumerically under the + # vendor they belong to. Vendors are sorted alphanumerically as well. + + # Andes - const: xandespmu description: The Andes Technology performance monitor extension for counter overflow @@ -628,6 +632,12 @@ properties: Registers in the AX45MP datasheet. https://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf + # T-HEAD + - const: xtheadvector + description: + The T-HEAD specific 0.7.1 vector implementation as written in + https://github.com/T-head-Semi/thead-extension-spec/blob/95358cb2cca9489361c61d335e03d3134b14133f/xtheadvector.adoc. + allOf: # Zcb depends on Zca - if: diff --git a/Documentation/devicetree/bindings/rtc/rtc-mxc.yaml b/Documentation/devicetree/bindings/rtc/rtc-mxc.yaml index a14b52178c4b..2599b847f406 100644 --- a/Documentation/devicetree/bindings/rtc/rtc-mxc.yaml +++ b/Documentation/devicetree/bindings/rtc/rtc-mxc.yaml @@ -14,9 +14,13 @@ maintainers: properties: compatible: - enum: - - fsl,imx1-rtc - - fsl,imx21-rtc + oneOf: + - const: fsl,imx1-rtc + - const: fsl,imx21-rtc + - items: + - enum: + - fsl,imx31-rtc + - const: fsl,imx21-rtc reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/sound/ti,pcm1681.yaml b/Documentation/devicetree/bindings/sound/ti,pcm1681.yaml index 5aa00617291c..1f0e6787a746 100644 --- a/Documentation/devicetree/bindings/sound/ti,pcm1681.yaml +++ b/Documentation/devicetree/bindings/sound/ti,pcm1681.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/sound/ti,pcm1681.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Texas Instruments PCM1681 8-channel PWM Processor +title: Texas Instruments PCM1681 8-channel Digital-to-Analog Converter maintainers: - Shenghao Ding <shenghao-ding@ti.com> diff --git a/Documentation/filesystems/debugfs.rst b/Documentation/filesystems/debugfs.rst index dc35da8b8792..f7f977ffbf8d 100644 --- a/Documentation/filesystems/debugfs.rst +++ b/Documentation/filesystems/debugfs.rst @@ -211,18 +211,16 @@ seq_file content. There are a couple of other directory-oriented helper functions:: - struct dentry *debugfs_rename(struct dentry *old_dir, - struct dentry *old_dentry, - struct dentry *new_dir, - const char *new_name); + struct dentry *debugfs_change_name(struct dentry *dentry, + const char *fmt, ...); struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, const char *target); -A call to debugfs_rename() will give a new name to an existing debugfs -file, possibly in a different directory. The new_name must not exist prior -to the call; the return value is old_dentry with updated information. +A call to debugfs_change_name() will give a new name to an existing debugfs +file, always in the same directory. The new_name must not exist prior +to the call; the return value is 0 on success and -E... on failuer. Symbolic links can be created with debugfs_create_symlink(). There is one important thing that all debugfs users must take into account: diff --git a/Documentation/filesystems/fuse-io-uring.rst b/Documentation/filesystems/fuse-io-uring.rst new file mode 100644 index 000000000000..d73dd0dbd238 --- /dev/null +++ b/Documentation/filesystems/fuse-io-uring.rst @@ -0,0 +1,99 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================================= +FUSE-over-io-uring design documentation +======================================= + +This documentation covers basic details how the fuse +kernel/userspace communication through io-uring is configured +and works. For generic details about FUSE see fuse.rst. + +This document also covers the current interface, which is +still in development and might change. + +Limitations +=========== +As of now not all requests types are supported through io-uring, userspace +is required to also handle requests through /dev/fuse after io-uring setup +is complete. Specifically notifications (initiated from the daemon side) +and interrupts. + +Fuse io-uring configuration +=========================== + +Fuse kernel requests are queued through the classical /dev/fuse +read/write interface - until io-uring setup is complete. + +In order to set up fuse-over-io-uring fuse-server (user-space) +needs to submit SQEs (opcode = IORING_OP_URING_CMD) to the /dev/fuse +connection file descriptor. Initial submit is with the sub command +FUSE_URING_REQ_REGISTER, which will just register entries to be +available in the kernel. + +Once at least one entry per queue is submitted, kernel starts +to enqueue to ring queues. +Note, every CPU core has its own fuse-io-uring queue. +Userspace handles the CQE/fuse-request and submits the result as +subcommand FUSE_URING_REQ_COMMIT_AND_FETCH - kernel completes +the requests and also marks the entry available again. If there are +pending requests waiting the request will be immediately submitted +to the daemon again. + +Initial SQE +-----------:: + + | | FUSE filesystem daemon + | | + | | >io_uring_submit() + | | IORING_OP_URING_CMD / + | | FUSE_URING_CMD_REGISTER + | | [wait cqe] + | | >io_uring_wait_cqe() or + | | >io_uring_submit_and_wait() + | | + | >fuse_uring_cmd() | + | >fuse_uring_register() | + + +Sending requests with CQEs +--------------------------:: + + | | FUSE filesystem daemon + | | [waiting for CQEs] + | "rm /mnt/fuse/file" | + | | + | >sys_unlink() | + | >fuse_unlink() | + | [allocate request] | + | >fuse_send_one() | + | ... | + | >fuse_uring_queue_fuse_req | + | [queue request on fg queue] | + | >fuse_uring_add_req_to_ring_ent() | + | ... | + | >fuse_uring_copy_to_ring() | + | >io_uring_cmd_done() | + | >request_wait_answer() | + | [sleep on req->waitq] | + | | [receives and handles CQE] + | | [submit result and fetch next] + | | >io_uring_submit() + | | IORING_OP_URING_CMD/ + | | FUSE_URING_CMD_COMMIT_AND_FETCH + | >fuse_uring_cmd() | + | >fuse_uring_commit_fetch() | + | >fuse_uring_commit() | + | >fuse_uring_copy_from_ring() | + | [ copy the result to the fuse req] | + | >fuse_uring_req_end() | + | >fuse_request_end() | + | [wake up req->waitq] | + | >fuse_uring_next_fuse_req | + | [wait or handle next req] | + | | + | [req->waitq woken up] | + | <fuse_unlink() | + | <sys_unlink() | + + + diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 44e9e77ffe0d..2636f2a41bd3 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -98,6 +98,7 @@ Documentation for filesystem implementations. hpfs fuse fuse-io + fuse-io-uring inotify isofs nilfs2 diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index f5e3676db954..d20a32b77b60 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -17,7 +17,8 @@ dentry_operations prototypes:: - int (*d_revalidate)(struct dentry *, unsigned int); + int (*d_revalidate)(struct inode *, const struct qstr *, + struct dentry *, unsigned int); int (*d_weak_revalidate)(struct dentry *, unsigned int); int (*d_hash)(const struct dentry *, struct qstr *); int (*d_compare)(const struct dentry *, @@ -30,6 +31,8 @@ prototypes:: struct vfsmount *(*d_automount)(struct path *path); int (*d_manage)(const struct path *, bool); struct dentry *(*d_real)(struct dentry *, enum d_real_type type); + bool (*d_unalias_trylock)(const struct dentry *); + void (*d_unalias_unlock)(const struct dentry *); locking rules: @@ -49,6 +52,8 @@ d_dname: no no no no d_automount: no no yes no d_manage: no no yes (ref-walk) maybe d_real no no yes no +d_unalias_trylock yes no no no +d_unalias_unlock yes no no no ================== =========== ======== ============== ======== inode_operations diff --git a/Documentation/filesystems/nfs/localio.rst b/Documentation/filesystems/nfs/localio.rst index bd1967e2eab3..79808b37d745 100644 --- a/Documentation/filesystems/nfs/localio.rst +++ b/Documentation/filesystems/nfs/localio.rst @@ -218,64 +218,30 @@ NFS Client and Server Interlock =============================== LOCALIO provides the nfs_uuid_t object and associated interfaces to -allow proper network namespace (net-ns) and NFSD object refcounting: - - We don't want to keep a long-term counted reference on each NFSD's - net-ns in the client because that prevents a server container from - completely shutting down. - - So we avoid taking a reference at all and rely on the per-cpu - reference to the server (detailed below) being sufficient to keep - the net-ns active. This involves allowing the NFSD's net-ns exit - code to iterate all active clients and clear their ->net pointers - (which are needed to find the per-cpu-refcount for the nfsd_serv). - - Details: - - - Embed nfs_uuid_t in nfs_client. nfs_uuid_t provides a list_head - that can be used to find the client. It does add the 16-byte - uuid_t to nfs_client so it is bigger than needed (given that - uuid_t is only used during the initial NFS client and server - LOCALIO handshake to determine if they are local to each other). - If that is really a problem we can find a fix. - - - When the nfs server confirms that the uuid_t is local, it moves - the nfs_uuid_t onto a per-net-ns list in NFSD's nfsd_net. - - - When each server's net-ns is shutting down - in a "pre_exit" - handler, all these nfs_uuid_t have their ->net cleared. There is - an rcu_synchronize() call between pre_exit() handlers and exit() - handlers so any caller that sees nfs_uuid_t ->net as not NULL can - safely manage the per-cpu-refcount for nfsd_serv. - - - The client's nfs_uuid_t is passed to nfsd_open_local_fh() so it - can safely dereference ->net in a private rcu_read_lock() section - to allow safe access to the associated nfsd_net and nfsd_serv. - -So LOCALIO required the introduction and use of NFSD's percpu_ref to -interlock nfsd_destroy_serv() and nfsd_open_local_fh(), to ensure each -nn->nfsd_serv is not destroyed while in use by nfsd_open_local_fh(), and +allow proper network namespace (net-ns) and NFSD object refcounting. + +LOCALIO required the introduction and use of NFSD's percpu nfsd_net_ref +to interlock nfsd_shutdown_net() and nfsd_open_local_fh(), to ensure +each net-ns is not destroyed while in use by nfsd_open_local_fh(), and warrants a more detailed explanation: - nfsd_open_local_fh() uses nfsd_serv_try_get() before opening its + nfsd_open_local_fh() uses nfsd_net_try_get() before opening its nfsd_file handle and then the caller (NFS client) must drop the - reference for the nfsd_file and associated nn->nfsd_serv using - nfs_file_put_local() once it has completed its IO. + reference for the nfsd_file and associated net-ns using + nfsd_file_put_local() once it has completed its IO. This interlock working relies heavily on nfsd_open_local_fh() being afforded the ability to safely deal with the possibility that the NFSD's net-ns (and nfsd_net by association) may have been destroyed - by nfsd_destroy_serv() via nfsd_shutdown_net() -- which is only - possible given the nfs_uuid_t ->net pointer managemenet detailed - above. - -All told, this elaborate interlock of the NFS client and server has been -verified to fix an easy to hit crash that would occur if an NFSD -instance running in a container, with a LOCALIO client mounted, is -shutdown. Upon restart of the container and associated NFSD the client -would go on to crash due to NULL pointer dereference that occurred due -to the LOCALIO client's attempting to nfsd_open_local_fh(), using -nn->nfsd_serv, without having a proper reference on nn->nfsd_serv. + by nfsd_destroy_serv() via nfsd_shutdown_net(). + +This interlock of the NFS client and server has been verified to fix an +easy to hit crash that would occur if an NFSD instance running in a +container, with a LOCALIO client mounted, is shutdown. Upon restart of +the container and associated NFSD, the client would go on to crash due +to NULL pointer dereference that occurred due to the LOCALIO client's +attempting to nfsd_open_local_fh() without having a proper reference on +NFSD's net-ns. NFS Client issues IO instead of Server ====================================== @@ -306,10 +272,26 @@ is issuing IO to the underlying local filesystem that it is sharing with the NFS server. See: fs/nfs/localio.c:nfs_local_doio() and fs/nfs/localio.c:nfs_local_commit(). +With normal NFS that makes use of RPC to issue IO to the server, if an +application uses O_DIRECT the NFS client will bypass the pagecache but +the NFS server will not. The NFS server's use of buffered IO affords +applications to be less precise with their alignment when issuing IO to +the NFS client. But if all applications properly align their IO, LOCALIO +can be configured to use end-to-end O_DIRECT semantics from the NFS +client to the underlying local filesystem, that it is sharing with +the NFS server, by setting the 'localio_O_DIRECT_semantics' nfs module +parameter to Y, e.g.: + + echo Y > /sys/module/nfs/parameters/localio_O_DIRECT_semantics + +Once enabled, it will cause LOCALIO to use end-to-end O_DIRECT semantics +(but again, this may cause IO to fail if applications do not properly +align their IO). + Security ======== -Localio is only supported when UNIX-style authentication (AUTH_UNIX, aka +LOCALIO is only supported when UNIX-style authentication (AUTH_UNIX, aka AUTH_SYS) is used. Care is taken to ensure the same NFS security mechanisms are used @@ -324,6 +306,24 @@ client is afforded this same level of access (albeit in terms of the NFS protocol via SUNRPC). No other namespaces (user, mount, etc) have been altered or purposely extended from the server to the client. +Module Parameters +================= + +/sys/module/nfs/parameters/localio_enabled (bool) +controls if LOCALIO is enabled, defaults to Y. If client and server are +local but 'localio_enabled' is set to N then LOCALIO will not be used. + +/sys/module/nfs/parameters/localio_O_DIRECT_semantics (bool) +controls if O_DIRECT extends down to the underlying filesystem, defaults +to N. Application IO must be logical blocksize aligned, otherwise +O_DIRECT will fail. + +/sys/module/nfsv3/parameters/nfs3_localio_probe_throttle (uint) +controls if NFSv3 read and write IOs will trigger (re)enabling of +LOCALIO every N (nfs3_localio_probe_throttle) IOs, defaults to 0 +(disabled). Must be power-of-2, admin keeps all the pieces if they +misconfigure (too low a value or non-power-of-2). + Testing ======= diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index c1c121055204..1639e78e3146 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -1141,3 +1141,19 @@ pointer are gone. set_blocksize() takes opened struct file instead of struct block_device now and it *must* be opened exclusive. + +--- + +** mandatory** + +->d_revalidate() gets two extra arguments - inode of parent directory and +name our dentry is expected to have. Both are stable (dir is pinned in +non-RCU case and will stay around during the call in RCU case, and name +is guaranteed to stay unchanging). Your instance doesn't have to use +either, but it often helps to avoid a lot of painful boilerplate. +Note that while name->name is stable and NUL-terminated, it may (and +often will) have name->name[name->len] equal to '/' rather than '\0' - +in normal case it points into the pathname being looked up. +NOTE: if you need something like full path from the root of filesystem, +you are still on your own - this assists with simple cases, but it's not +magic. diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst index 0b18af3f954e..31eea688609a 100644 --- a/Documentation/filesystems/vfs.rst +++ b/Documentation/filesystems/vfs.rst @@ -1251,7 +1251,8 @@ defined: .. code-block:: c struct dentry_operations { - int (*d_revalidate)(struct dentry *, unsigned int); + int (*d_revalidate)(struct inode *, const struct qstr *, + struct dentry *, unsigned int); int (*d_weak_revalidate)(struct dentry *, unsigned int); int (*d_hash)(const struct dentry *, struct qstr *); int (*d_compare)(const struct dentry *, @@ -1264,6 +1265,8 @@ defined: struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(const struct path *, bool); struct dentry *(*d_real)(struct dentry *, enum d_real_type type); + bool (*d_unalias_trylock)(const struct dentry *); + void (*d_unalias_unlock)(const struct dentry *); }; ``d_revalidate`` @@ -1427,6 +1430,25 @@ defined: For non-regular files, the 'dentry' argument is returned. +``d_unalias_trylock`` + if present, will be called by d_splice_alias() before moving a + preexisting attached alias. Returning false prevents __d_move(), + making d_splice_alias() fail with -ESTALE. + + Rationale: setting FS_RENAME_DOES_D_MOVE will prevent d_move() + and d_exchange() calls from the outside of filesystem methods; + however, it does not guarantee that attached dentries won't + be renamed or moved by d_splice_alias() finding a preexisting + alias for a directory inode. Normally we would not care; + however, something that wants to stabilize the entire path to + root over a blocking operation might need that. See 9p for one + (and hopefully only) example. + +``d_unalias_unlock`` + should be paired with ``d_unalias_trylock``; that one is called after + __d_move() call in __d_unalias(). + + Each dentry has a pointer to its parent dentry, as well as a hash list of child dentries. Child dentries are basically like files in a directory. diff --git a/Documentation/kbuild/gendwarfksyms.rst b/Documentation/kbuild/gendwarfksyms.rst new file mode 100644 index 000000000000..e4beaae7e456 --- /dev/null +++ b/Documentation/kbuild/gendwarfksyms.rst @@ -0,0 +1,308 @@ +======================= +DWARF module versioning +======================= + +1. Introduction +=============== + +When CONFIG_MODVERSIONS is enabled, symbol versions for modules +are typically calculated from preprocessed source code using the +**genksyms** tool. However, this is incompatible with languages such +as Rust, where the source code has insufficient information about +the resulting ABI. With CONFIG_GENDWARFKSYMS (and CONFIG_DEBUG_INFO) +selected, **gendwarfksyms** is used instead to calculate symbol versions +from the DWARF debugging information, which contains the necessary +details about the final module ABI. + +1.1. Usage +========== + +gendwarfksyms accepts a list of object files on the command line, and a +list of symbol names (one per line) in standard input:: + + Usage: gendwarfksyms [options] elf-object-file ... < symbol-list + + Options: + -d, --debug Print debugging information + --dump-dies Dump DWARF DIE contents + --dump-die-map Print debugging information about die_map changes + --dump-types Dump type strings + --dump-versions Dump expanded type strings used for symbol versions + -s, --stable Support kABI stability features + -T, --symtypes file Write a symtypes file + -h, --help Print this message + + +2. Type information availability +================================ + +While symbols are typically exported in the same translation unit (TU) +where they're defined, it's also perfectly fine for a TU to export +external symbols. For example, this is done when calculating symbol +versions for exports in stand-alone assembly code. + +To ensure the compiler emits the necessary DWARF type information in the +TU where symbols are actually exported, gendwarfksyms adds a pointer +to exported symbols in the `EXPORT_SYMBOL()` macro using the following +macro:: + + #define __GENDWARFKSYMS_EXPORT(sym) \ + static typeof(sym) *__gendwarfksyms_ptr_##sym __used \ + __section(".discard.gendwarfksyms") = &sym; + + +When a symbol pointer is found in DWARF, gendwarfksyms can use its +type for calculating symbol versions even if the symbol is defined +elsewhere. The name of the symbol pointer is expected to start with +`__gendwarfksyms_ptr_`, followed by the name of the exported symbol. + +3. Symtypes output format +========================= + +Similarly to genksyms, gendwarfksyms supports writing a symtypes +file for each processed object that contain types for exported +symbols and each referenced type that was used in calculating symbol +versions. These files can be useful when trying to determine what +exactly caused symbol versions to change between builds. To generate +symtypes files during a kernel build, set `KBUILD_SYMTYPES=1`. + +Matching the existing format, the first column of each line contains +either a type reference or a symbol name. Type references have a +one-letter prefix followed by "#" and the name of the type. Four +reference types are supported:: + + e#<type> = enum + s#<type> = struct + t#<type> = typedef + u#<type> = union + +Type names with spaces in them are wrapped in single quotes, e.g.:: + + s#'core::result::Result<u8, core::num::error::ParseIntError>' + +The rest of the line contains a type string. Unlike with genksyms that +produces C-style type strings, gendwarfksyms uses the same simple parsed +DWARF format produced by **--dump-dies**, but with type references +instead of fully expanded strings. + +4. Maintaining a stable kABI +============================ + +Distribution maintainers often need the ability to make ABI compatible +changes to kernel data structures due to LTS updates or backports. Using +the traditional `#ifndef __GENKSYMS__` to hide these changes from symbol +versioning won't work when processing object files. To support this +use case, gendwarfksyms provides kABI stability features designed to +hide changes that won't affect the ABI when calculating versions. These +features are all gated behind the **--stable** command line flag and are +not used in the mainline kernel. To use stable features during a kernel +build, set `KBUILD_GENDWARFKSYMS_STABLE=1`. + +Examples for using these features are provided in the +**scripts/gendwarfksyms/examples** directory, including helper macros +for source code annotation. Note that as these features are only used to +transform the inputs for symbol versioning, the user is responsible for +ensuring that their changes actually won't break the ABI. + +4.1. kABI rules +=============== + +kABI rules allow distributions to fine-tune certain parts +of gendwarfksyms output and thus control how symbol +versions are calculated. These rules are defined in the +`.discard.gendwarfksyms.kabi_rules` section of the object file and +consist of simple null-terminated strings with the following structure:: + + version\0type\0target\0value\0 + +This string sequence is repeated as many times as needed to express all +the rules. The fields are as follows: + +- `version`: Ensures backward compatibility for future changes to the + structure. Currently expected to be "1". +- `type`: Indicates the type of rule being applied. +- `target`: Specifies the target of the rule, typically the fully + qualified name of the DWARF Debugging Information Entry (DIE). +- `value`: Provides rule-specific data. + +The following helper macro, for example, can be used to specify rules +in the source code:: + + #define __KABI_RULE(hint, target, value) \ + static const char __PASTE(__gendwarfksyms_rule_, \ + __COUNTER__)[] __used __aligned(1) \ + __section(".discard.gendwarfksyms.kabi_rules") = \ + "1\0" #hint "\0" #target "\0" #value + + +Currently, only the rules discussed in this section are supported, but +the format is extensible enough to allow further rules to be added as +need arises. + +4.1.1. Managing definition visibility +===================================== + +A declaration can change into a full definition when additional includes +are pulled into the translation unit. This changes the versions of any +symbol that references the type even if the ABI remains unchanged. As +it may not be possible to drop includes without breaking the build, the +`declonly` rule can be used to specify a type as declaration-only, even +if the debugging information contains the full definition. + +The rule fields are expected to be as follows: + +- `type`: "declonly" +- `target`: The fully qualified name of the target data structure + (as shown in **--dump-dies** output). +- `value`: This field is ignored. + +Using the `__KABI_RULE` macro, this rule can be defined as:: + + #define KABI_DECLONLY(fqn) __KABI_RULE(declonly, fqn, ) + +Example usage:: + + struct s { + /* definition */ + }; + + KABI_DECLONLY(s); + +4.1.2. Adding enumerators +========================= + +For enums, all enumerators and their values are included in calculating +symbol versions, which becomes a problem if we later need to add more +enumerators without changing symbol versions. The `enumerator_ignore` +rule allows us to hide named enumerators from the input. + +The rule fields are expected to be as follows: + +- `type`: "enumerator_ignore" +- `target`: The fully qualified name of the target enum + (as shown in **--dump-dies** output) and the name of the + enumerator field separated by a space. +- `value`: This field is ignored. + +Using the `__KABI_RULE` macro, this rule can be defined as:: + + #define KABI_ENUMERATOR_IGNORE(fqn, field) \ + __KABI_RULE(enumerator_ignore, fqn field, ) + +Example usage:: + + enum e { + A, B, C, D, + }; + + KABI_ENUMERATOR_IGNORE(e, B); + KABI_ENUMERATOR_IGNORE(e, C); + +If the enum additionally includes an end marker and new values must +be added in the middle, we may need to use the old value for the last +enumerator when calculating versions. The `enumerator_value` rule allows +us to override the value of an enumerator for version calculation: + +- `type`: "enumerator_value" +- `target`: The fully qualified name of the target enum + (as shown in **--dump-dies** output) and the name of the + enumerator field separated by a space. +- `value`: Integer value used for the field. + +Using the `__KABI_RULE` macro, this rule can be defined as:: + + #define KABI_ENUMERATOR_VALUE(fqn, field, value) \ + __KABI_RULE(enumerator_value, fqn field, value) + +Example usage:: + + enum e { + A, B, C, LAST, + }; + + KABI_ENUMERATOR_IGNORE(e, C); + KABI_ENUMERATOR_VALUE(e, LAST, 2); + +4.3. Adding structure members +============================= + +Perhaps the most common ABI compatible change is adding a member to a +kernel data structure. When changes to a structure are anticipated, +distribution maintainers can pre-emptively reserve space in the +structure and take it into use later without breaking the ABI. If +changes are needed to data structures without reserved space, existing +alignment holes can potentially be used instead. While kABI rules could +be added for these type of changes, using unions is typically a more +natural method. This section describes gendwarfksyms support for using +reserved space in data structures and hiding members that don't change +the ABI when calculating symbol versions. + +4.3.1. Reserving space and replacing members +============================================ + +Space is typically reserved for later use by appending integer types, or +arrays, to the end of the data structure, but any type can be used. Each +reserved member needs a unique name, but as the actual purpose is usually +not known at the time the space is reserved, for convenience, names that +start with `__kabi_` are left out when calculating symbol versions:: + + struct s { + long a; + long __kabi_reserved_0; /* reserved for future use */ + }; + +The reserved space can be taken into use by wrapping the member in a +union, which includes the original type and the replacement member:: + + struct s { + long a; + union { + long __kabi_reserved_0; /* original type */ + struct b b; /* replaced field */ + }; + }; + +If the `__kabi_` naming scheme was used when reserving space, the name +of the first member of the union must start with `__kabi_reserved`. This +ensures the original type is used when calculating versions, but the name +is again left out. The rest of the union is ignored. + +If we're replacing a member that doesn't follow this naming convention, +we also need to preserve the original name to avoid changing versions, +which we can do by changing the first union member's name to start with +`__kabi_renamed` followed by the original name. + +The examples include `KABI_(RESERVE|USE|REPLACE)*` macros that help +simplify the process and also ensure the replacement member is correctly +aligned and its size won't exceed the reserved space. + +4.3.2. Hiding members +===================== + +Predicting which structures will require changes during the support +timeframe isn't always possible, in which case one might have to resort +to placing new members into existing alignment holes:: + + struct s { + int a; + /* a 4-byte alignment hole */ + unsigned long b; + }; + + +While this won't change the size of the data structure, one needs to +be able to hide the added members from symbol versioning. Similarly +to reserved fields, this can be accomplished by wrapping the added +member to a union where one of the fields has a name starting with +`__kabi_ignored`:: + + struct s { + int a; + union { + char __kabi_ignored_0; + int n; + }; + unsigned long b; + }; + +With **--stable**, both versions produce the same symbol version. diff --git a/Documentation/kbuild/index.rst b/Documentation/kbuild/index.rst index cee2f99f734b..e82af05cd652 100644 --- a/Documentation/kbuild/index.rst +++ b/Documentation/kbuild/index.rst @@ -21,6 +21,7 @@ Kernel Build System reproducible-builds gcc-plugins llvm + gendwarfksyms .. only:: subproject and html diff --git a/Documentation/kbuild/modules.rst b/Documentation/kbuild/modules.rst index 101de236cd0c..a42f00d8cb90 100644 --- a/Documentation/kbuild/modules.rst +++ b/Documentation/kbuild/modules.rst @@ -423,6 +423,26 @@ Symbols From the Kernel (vmlinux + modules) 1) It lists all exported symbols from vmlinux and all modules. 2) It lists the CRC if CONFIG_MODVERSIONS is enabled. +Version Information Formats +--------------------------- + + Exported symbols have information stored in __ksymtab or __ksymtab_gpl + sections. Symbol names and namespaces are stored in __ksymtab_strings, + using a format similar to the string table used for ELF. If + CONFIG_MODVERSIONS is enabled, the CRCs corresponding to exported + symbols will be added to the __kcrctab or __kcrctab_gpl. + + If CONFIG_BASIC_MODVERSIONS is enabled (default with + CONFIG_MODVERSIONS), imported symbols will have their symbol name and + CRC stored in the __versions section of the importing module. This + mode only supports symbols of length up to 64 bytes. + + If CONFIG_EXTENDED_MODVERSIONS is enabled (required to enable both + CONFIG_MODVERSIONS and CONFIG_RUST at the same time), imported symbols + will have their symbol name recorded in the __version_ext_names + section as a series of concatenated, null-terminated strings. CRCs for + these symbols will be recorded in the __version_ext_crcs section. + Symbols and External Modules ---------------------------- diff --git a/Documentation/networking/can.rst b/Documentation/networking/can.rst index 62519d38c58b..b018ce346392 100644 --- a/Documentation/networking/can.rst +++ b/Documentation/networking/can.rst @@ -699,10 +699,10 @@ RAW socket option CAN_RAW_JOIN_FILTERS The CAN_RAW socket can set multiple CAN identifier specific filters that lead to multiple filters in the af_can.c filter processing. These filters -are indenpendent from each other which leads to logical OR'ed filters when +are independent from each other which leads to logical OR'ed filters when applied (see :ref:`socketcan-rawfilter`). -This socket option joines the given CAN filters in the way that only CAN +This socket option joins the given CAN filters in the way that only CAN frames are passed to user space that matched *all* given CAN filters. The semantic for the applied filters is therefore changed to a logical AND. diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst index dc45c0211353..03e1d3610333 100644 --- a/Documentation/networking/mptcp-sysctl.rst +++ b/Documentation/networking/mptcp-sysctl.rst @@ -41,7 +41,7 @@ blackhole_timeout - INTEGER (seconds) MPTCP is re-enabled and will reset to the initial value when the blackhole issue goes away. - 0 to disable the blackhole detection. + 0 to disable the blackhole detection. This is a per-namespace sysctl. Default: 3600 diff --git a/Documentation/networking/napi.rst b/Documentation/networking/napi.rst index 6083210ab2a4..f970a2be271a 100644 --- a/Documentation/networking/napi.rst +++ b/Documentation/networking/napi.rst @@ -362,7 +362,7 @@ It is expected that ``irq-suspend-timeout`` will be set to a value much larger than ``gro_flush_timeout`` as ``irq-suspend-timeout`` should suspend IRQs for the duration of one userland processing cycle. -While it is not stricly necessary to use ``napi_defer_hard_irqs`` and +While it is not strictly necessary to use ``napi_defer_hard_irqs`` and ``gro_flush_timeout`` to use IRQ suspension, their use is strongly recommended. diff --git a/Documentation/power/video.rst b/Documentation/power/video.rst index 337a2ba9f32f..8ab2458d1304 100644 --- a/Documentation/power/video.rst +++ b/Documentation/power/video.rst @@ -190,7 +190,7 @@ Toshiba Portege 3020CT s3_mode (3) Toshiba Satellite 4030CDT s3_mode (3) (S1 also works OK) Toshiba Satellite 4080XCDT s3_mode (3) (S1 also works OK) Toshiba Satellite 4090XCDT ??? [#f1]_ -Toshiba Satellite P10-554 s3_bios,s3_mode (4)[#f3]_ +Toshiba Satellite P10-554 s3_bios,s3_mode (4) [#f3]_ Toshiba M30 (2) xor X with nvidia driver using internal AGP Uniwill 244IIO ??? [#f1]_ =============================== =============================================== diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst index 82b5e378eebf..a0beca805362 100644 --- a/Documentation/process/changes.rst +++ b/Documentation/process/changes.rst @@ -59,7 +59,6 @@ iptables 1.4.2 iptables -V openssl & libcrypto 1.0.0 openssl version bc 1.06.95 bc --version Sphinx\ [#f1]_ 2.4.4 sphinx-build --version -cpio any cpio --version GNU tar 1.28 tar --version gtags (optional) 6.6.5 gtags --version mkimage (optional) 2017.01 mkimage --version @@ -536,11 +535,6 @@ mcelog - <https://www.mcelog.org/> -cpio ----- - -- <https://www.gnu.org/software/cpio/> - Networking ********** diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index bf5aff018c2f..6d1465315df3 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -312,6 +312,7 @@ Code Seq# Include File Comments <mailto:oe@port.de> 'z' 10-4F drivers/s390/crypto/zcrypt_api.h conflict! '|' 00-7F linux/media.h +'|' 80-9F samples/ Any sample and example drivers 0x80 00-1F linux/fb.h 0x81 00-1F linux/vduse.h 0x89 00-06 arch/x86/include/asm/sockios.h diff --git a/MAINTAINERS b/MAINTAINERS index 4fcd5733b656..857b3014f853 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -987,6 +987,12 @@ L: linux-edac@vger.kernel.org S: Supported F: drivers/ras/amd/atl/* +AMD AE4DMA DRIVER +M: Basavaraj Natikar <Basavaraj.Natikar@amd.com> +L: dmaengine@vger.kernel.org +S: Supported +F: drivers/dma/amd/ae4dma/ + AMD AXI W1 DRIVER M: Kris Chaplin <kris.chaplin@amd.com> R: Thomas Delev <thomas.delev@amd.com> @@ -1179,8 +1185,8 @@ F: tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py AMD PTDMA DRIVER M: Basavaraj Natikar <Basavaraj.Natikar@amd.com> L: dmaengine@vger.kernel.org -S: Maintained -F: drivers/dma/ptdma/ +S: Supported +F: drivers/dma/amd/ptdma/ AMD QDMA DRIVER M: Nishad Saraf <nishads@amd.com> @@ -4096,6 +4102,7 @@ S: Supported W: http://www.bluez.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git +F: Documentation/ABI/stable/sysfs-class-bluetooth F: include/net/bluetooth/ F: net/bluetooth/ @@ -5366,6 +5373,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git F: drivers/char/ F: drivers/misc/ F: include/linux/miscdevice.h +F: samples/rust/rust_misc_device.rs X: drivers/char/agp/ X: drivers/char/hw_random/ X: drivers/char/ipmi/ @@ -7091,6 +7099,7 @@ F: include/linux/component.h DRIVER CORE, KOBJECTS, DEBUGFS AND SYSFS M: Greg Kroah-Hartman <gregkh@linuxfoundation.org> R: "Rafael J. Wysocki" <rafael@kernel.org> +R: Danilo Krummrich <dakr@kernel.org> S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git F: Documentation/core-api/kobject.rst @@ -7101,8 +7110,14 @@ F: include/linux/debugfs.h F: include/linux/fwnode.h F: include/linux/kobj* F: include/linux/property.h +F: include/linux/sysfs.h F: lib/kobj* F: rust/kernel/device.rs +F: rust/kernel/device_id.rs +F: rust/kernel/devres.rs +F: rust/kernel/driver.rs +F: rust/kernel/platform.rs +F: samples/rust/rust_driver_platform.rs DRIVERS FOR OMAP ADAPTIVE VOLTAGE SCALING (AVS) M: Nishanth Menon <nm@ti.com> @@ -9626,6 +9641,13 @@ W: https://linuxtv.org T: git git://linuxtv.org/media.git F: drivers/media/radio/radio-gemtek* +GENDWARFKSYMS +M: Sami Tolvanen <samitolvanen@google.com> +L: linux-modules@vger.kernel.org +L: linux-kbuild@vger.kernel.org +S: Maintained +F: scripts/gendwarfksyms/ + GENERIC ARCHITECTURE TOPOLOGY M: Sudeep Holla <sudeep.holla@arm.com> L: linux-kernel@vger.kernel.org @@ -16263,6 +16285,7 @@ F: drivers/scsi/sun3_scsi_vme.c NCSI LIBRARY M: Samuel Mendoza-Jonas <sam@mendozajonas.com> +R: Paul Fertser <fercerpav@gmail.com> S: Maintained F: net/ncsi/ @@ -16597,6 +16620,7 @@ F: tools/testing/selftests/net/mptcp/ NETWORKING [TCP] M: Eric Dumazet <edumazet@google.com> +M: Neal Cardwell <ncardwell@google.com> L: netdev@vger.kernel.org S: Maintained F: Documentation/networking/net_cachelines/tcp_sock.rst @@ -17632,6 +17656,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git F: Documentation/ABI/testing/sysfs-firmware-ofw F: drivers/of/ F: include/linux/of*.h +F: rust/kernel/of.rs F: scripts/dtc/ F: tools/testing/selftests/dt/ K: of_overlay_notifier_ @@ -18232,6 +18257,8 @@ F: include/asm-generic/pci* F: include/linux/of_pci.h F: include/linux/pci* F: include/uapi/linux/pci* +F: rust/kernel/pci.rs +F: samples/rust/rust_driver_pci.rs PCIE BANDWIDTH CONTROLLER M: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> @@ -19830,6 +19857,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux.git rcu/dev F: Documentation/RCU/ F: include/linux/rcu* F: kernel/rcu/ +F: rust/kernel/sync/rcu.rs X: Documentation/RCU/torture.rst X: include/linux/srcu*.h X: kernel/rcu/srcu*.c @@ -22489,7 +22517,6 @@ F: drivers/phy/starfive/phy-jh7110-dphy-rx.c STARFIVE JH7110 DPHY TX DRIVER M: Keith Zhao <keith.zhao@starfivetech.com> -M: Shengyang Chen <shengyang.chen@starfivetech.com> S: Supported F: Documentation/devicetree/bindings/phy/starfive,jh7110-dphy-tx.yaml F: drivers/phy/starfive/phy-jh7110-dphy-tx.c diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 62da5827f471..f27e6b90428e 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -18,6 +18,7 @@ config ARC select ARCH_SUPPORTS_ATOMIC_RMW if ARC_HAS_LLSC select ARCH_32BIT_OFF_T select BUILDTIME_TABLE_SORT + select GENERIC_BUILTIN_DTB select CLONE_BACKWARDS select COMMON_CLK select DMA_DIRECT_REMAP @@ -550,11 +551,11 @@ config ARC_DBG_JUMP_LABEL part of static keys (jump labels) related code. endif -config ARC_BUILTIN_DTB_NAME +config BUILTIN_DTB_NAME string "Built in DTB" + default "nsim_700" help - Set the name of the DTB to embed in the vmlinux binary - Leaving it blank selects the "nsim_700" dtb. + Set the name of the DTB to embed in the vmlinux binary. endmenu # "ARC Architecture Configuration" diff --git a/arch/arc/Makefile b/arch/arc/Makefile index fb98478ed1ab..0c5e6e6314f2 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -82,9 +82,6 @@ KBUILD_CFLAGS += $(cflags-y) KBUILD_AFLAGS += $(KBUILD_CFLAGS) KBUILD_LDFLAGS += $(ldflags-y) -# w/o this dtb won't embed into kernel binary -core-y += arch/arc/boot/dts/ - core-y += arch/arc/plat-sim/ core-$(CONFIG_ARC_PLAT_TB10X) += arch/arc/plat-tb10x/ core-$(CONFIG_ARC_PLAT_AXS10X) += arch/arc/plat-axs10x/ diff --git a/arch/arc/boot/dts/Makefile b/arch/arc/boot/dts/Makefile index 48704dfdf75c..ee5664f0640d 100644 --- a/arch/arc/boot/dts/Makefile +++ b/arch/arc/boot/dts/Makefile @@ -1,13 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -# Built-in dtb -builtindtb-y := nsim_700 -ifneq ($(CONFIG_ARC_BUILTIN_DTB_NAME),) - builtindtb-y := $(CONFIG_ARC_BUILTIN_DTB_NAME) -endif - -obj-y += $(builtindtb-y).dtb.o -dtb-y := $(builtindtb-y).dtb +dtb-y := $(addsuffix .dtb, $(CONFIG_BUILTIN_DTB_NAME)) # for CONFIG_OF_ALL_DTBS test dtb- := $(patsubst $(src)/%.dts,%.dtb, $(wildcard $(src)/*.dts)) diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index 319bbe270322..a7cd526dd7ca 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -23,7 +23,7 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_ARC_PLAT_AXS10X=y CONFIG_AXS101=y CONFIG_ARC_CACHE_LINE_SHIFT=5 -CONFIG_ARC_BUILTIN_DTB_NAME="axs101" +CONFIG_BUILTIN_DTB_NAME="axs101" CONFIG_PREEMPT=y # CONFIG_COMPACTION is not set CONFIG_NET=y diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index 8c1f1a111a17..afa6a348f444 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -22,7 +22,7 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_ARC_PLAT_AXS10X=y CONFIG_AXS103=y CONFIG_ISA_ARCV2=y -CONFIG_ARC_BUILTIN_DTB_NAME="axs103" +CONFIG_BUILTIN_DTB_NAME="axs103" CONFIG_PREEMPT=y # CONFIG_COMPACTION is not set CONFIG_NET=y diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index 75cab9f25b5b..2bfa6371953c 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -22,7 +22,7 @@ CONFIG_ARC_PLAT_AXS10X=y CONFIG_AXS103=y CONFIG_ISA_ARCV2=y CONFIG_SMP=y -CONFIG_ARC_BUILTIN_DTB_NAME="axs103_idu" +CONFIG_BUILTIN_DTB_NAME="axs103_idu" CONFIG_PREEMPT=y # CONFIG_COMPACTION is not set CONFIG_NET=y diff --git a/arch/arc/configs/haps_hs_defconfig b/arch/arc/configs/haps_hs_defconfig index 8c3ed5d6e6c3..3a1577112078 100644 --- a/arch/arc/configs/haps_hs_defconfig +++ b/arch/arc/configs/haps_hs_defconfig @@ -14,7 +14,7 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y CONFIG_PERF_EVENTS=y # CONFIG_COMPAT_BRK is not set -CONFIG_ARC_BUILTIN_DTB_NAME="haps_hs" +CONFIG_BUILTIN_DTB_NAME="haps_hs" CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_COMPACTION is not set diff --git a/arch/arc/configs/haps_hs_smp_defconfig b/arch/arc/configs/haps_hs_smp_defconfig index 6fc98c1b9b36..a3cf940b1f5b 100644 --- a/arch/arc/configs/haps_hs_smp_defconfig +++ b/arch/arc/configs/haps_hs_smp_defconfig @@ -16,7 +16,7 @@ CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set CONFIG_SMP=y -CONFIG_ARC_BUILTIN_DTB_NAME="haps_hs_idu" +CONFIG_BUILTIN_DTB_NAME="haps_hs_idu" CONFIG_KPROBES=y CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 9e79154b5535..1558e8e87767 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -20,7 +20,7 @@ CONFIG_ISA_ARCV2=y CONFIG_SMP=y CONFIG_LINUX_LINK_BASE=0x90000000 CONFIG_LINUX_RAM_BASE=0x80000000 -CONFIG_ARC_BUILTIN_DTB_NAME="hsdk" +CONFIG_BUILTIN_DTB_NAME="hsdk" CONFIG_PREEMPT=y # CONFIG_COMPACTION is not set CONFIG_NET=y diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig index 51092c39e360..f8b3235d9a65 100644 --- a/arch/arc/configs/nsim_700_defconfig +++ b/arch/arc/configs/nsim_700_defconfig @@ -17,7 +17,7 @@ CONFIG_PERF_EVENTS=y # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set CONFIG_ISA_ARCOMPACT=y -CONFIG_ARC_BUILTIN_DTB_NAME="nsim_700" +CONFIG_BUILTIN_DTB_NAME="nsim_700" CONFIG_KPROBES=y CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index 70c17bca4939..ee45dc0877fb 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -19,7 +19,7 @@ CONFIG_ISA_ARCOMPACT=y CONFIG_KPROBES=y CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set -CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci" +CONFIG_BUILTIN_DTB_NAME="nsimosci" # CONFIG_COMPACTION is not set CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig index 59a3b6642fe7..e0a309970c20 100644 --- a/arch/arc/configs/nsimosci_hs_defconfig +++ b/arch/arc/configs/nsimosci_hs_defconfig @@ -19,7 +19,7 @@ CONFIG_KPROBES=y CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set CONFIG_ISA_ARCV2=y -CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs" +CONFIG_BUILTIN_DTB_NAME="nsimosci_hs" # CONFIG_COMPACTION is not set CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig index 1419fc946a08..88325b8b49cf 100644 --- a/arch/arc/configs/nsimosci_hs_smp_defconfig +++ b/arch/arc/configs/nsimosci_hs_smp_defconfig @@ -16,7 +16,7 @@ CONFIG_MODULES=y CONFIG_ISA_ARCV2=y CONFIG_SMP=y # CONFIG_ARC_TIMERS_64BIT is not set -CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs_idu" +CONFIG_BUILTIN_DTB_NAME="nsimosci_hs_idu" CONFIG_PREEMPT=y # CONFIG_COMPACTION is not set CONFIG_NET=y diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig index 5aba3d850fa2..865fbc19ef03 100644 --- a/arch/arc/configs/tb10x_defconfig +++ b/arch/arc/configs/tb10x_defconfig @@ -26,7 +26,7 @@ CONFIG_MODULE_UNLOAD=y CONFIG_ARC_PLAT_TB10X=y CONFIG_ARC_CACHE_LINE_SHIFT=5 CONFIG_HZ=250 -CONFIG_ARC_BUILTIN_DTB_NAME="abilis_tb100_dvk" +CONFIG_BUILTIN_DTB_NAME="abilis_tb100_dvk" CONFIG_PREEMPT_VOLUNTARY=y # CONFIG_COMPACTION is not set CONFIG_NET=y diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig index 50c343913825..03d9ac20baa9 100644 --- a/arch/arc/configs/vdk_hs38_defconfig +++ b/arch/arc/configs/vdk_hs38_defconfig @@ -13,7 +13,7 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_ARC_PLAT_AXS10X=y CONFIG_AXS103=y CONFIG_ISA_ARCV2=y -CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38" +CONFIG_BUILTIN_DTB_NAME="vdk_hs38" CONFIG_PREEMPT=y CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig index 6d9e1d9f71d2..c09488992f13 100644 --- a/arch/arc/configs/vdk_hs38_smp_defconfig +++ b/arch/arc/configs/vdk_hs38_smp_defconfig @@ -15,7 +15,7 @@ CONFIG_AXS103=y CONFIG_ISA_ARCV2=y CONFIG_SMP=y # CONFIG_ARC_TIMERS_64BIT is not set -CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38_smp" +CONFIG_BUILTIN_DTB_NAME="vdk_hs38_smp" CONFIG_PREEMPT=y CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/arm/include/asm/ecard.h b/arch/arm/include/asm/ecard.h index 4befe8d2ae19..7cbe001bf9cc 100644 --- a/arch/arm/include/asm/ecard.h +++ b/arch/arm/include/asm/ecard.h @@ -195,7 +195,7 @@ void __iomem *ecardm_iomap(struct expansion_card *ec, unsigned int res, unsigned long offset, unsigned long maxsize); #define ecardm_iounmap(__ec, __addr) devm_iounmap(&(__ec)->dev, __addr) -extern struct bus_type ecard_bus_type; +extern const struct bus_type ecard_bus_type; #define ECARD_DEV(_d) container_of((_d), struct expansion_card, dev) diff --git a/arch/arm/kernel/isa.c b/arch/arm/kernel/isa.c index 905b1b191546..db8be609fab2 100644 --- a/arch/arm/kernel/isa.c +++ b/arch/arm/kernel/isa.c @@ -16,7 +16,7 @@ static unsigned int isa_membase, isa_portbase, isa_portshift; -static struct ctl_table ctl_isa_vars[] = { +static const struct ctl_table ctl_isa_vars[] = { { .procname = "membase", .data = &isa_membase, diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c index 9f7454b8efa7..2cde4c83b7f9 100644 --- a/arch/arm/mach-rpc/ecard.c +++ b/arch/arm/mach-rpc/ecard.c @@ -1124,7 +1124,7 @@ static int ecard_match(struct device *_dev, const struct device_driver *_drv) return ret; } -struct bus_type ecard_bus_type = { +const struct bus_type ecard_bus_type = { .name = "ecard", .dev_groups = ecard_dev_groups, .match = ecard_match, diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 8c4c1a2186cc..2b601d88762d 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -562,7 +562,7 @@ static int vec_proc_do_default_vl(const struct ctl_table *table, int write, return 0; } -static struct ctl_table sve_default_vl_table[] = { +static const struct ctl_table sve_default_vl_table[] = { { .procname = "sve_default_vector_length", .mode = 0644, @@ -585,7 +585,7 @@ static int __init sve_sysctl_init(void) { return 0; } #endif /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */ #if defined(CONFIG_ARM64_SME) && defined(CONFIG_SYSCTL) -static struct ctl_table sme_default_vl_table[] = { +static const struct ctl_table sme_default_vl_table[] = { { .procname = "sme_default_vector_length", .mode = 0644, diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 2968a33bb3bc..42faebb7b712 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -859,7 +859,7 @@ long get_tagged_addr_ctrl(struct task_struct *task) * disable it for tasks that already opted in to the relaxed ABI. */ -static struct ctl_table tagged_addr_sysctl_table[] = { +static const struct ctl_table tagged_addr_sysctl_table[] = { { .procname = "tagged_addr_disabled", .mode = 0644, diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 30b56c67fa61..e527cd3ef128 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -97,7 +97,7 @@ void power4_idle(void) /* * Register the sysctl to set/clear powersave_nap. */ -static struct ctl_table powersave_nap_ctl_table[] = { +static const struct ctl_table powersave_nap_ctl_table[] = { { .procname = "powersave-nap", .data = &powersave_nap, diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 45dac7b46aa3..34a5aec4908f 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -369,6 +369,24 @@ static void dedotify_versions(struct modversion_info *vers, } } +/* Same as normal versions, remove a leading dot if present. */ +static void dedotify_ext_version_names(char *str_seq, unsigned long size) +{ + unsigned long out = 0; + unsigned long in; + char last = '\0'; + + for (in = 0; in < size; in++) { + /* Skip one leading dot */ + if (last == '\0' && str_seq[in] == '.') + in++; + last = str_seq[in]; + str_seq[out++] = last; + } + /* Zero the trailing portion of the names table for robustness */ + memset(&str_seq[out], 0, size - out); +} + /* * Undefined symbols which refer to .funcname, hack to funcname. Make .TOC. * seem to be defined (value set later). @@ -438,10 +456,12 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, me->arch.toc_section = i; if (sechdrs[i].sh_addralign < 8) sechdrs[i].sh_addralign = 8; - } - else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0) + } else if (strcmp(secstrings + sechdrs[i].sh_name, "__versions") == 0) dedotify_versions((void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size); + else if (strcmp(secstrings + sechdrs[i].sh_name, "__version_ext_names") == 0) + dedotify_ext_version_names((void *)hdr + sechdrs[i].sh_offset, + sechdrs[i].sh_size); if (sechdrs[i].sh_type == SHT_SYMTAB) dedotify((void *)hdr + sechdrs[i].sh_offset, diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 09bd93464b4f..9ec265fcaff4 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -815,7 +815,7 @@ static int opal_add_one_export(struct kobject *parent, const char *export_name, sysfs_bin_attr_init(attr); attr->attr.name = name; attr->attr.mode = 0400; - attr->read = sysfs_bin_attr_simple_read; + attr->read_new = sysfs_bin_attr_simple_read; attr->private = __va(vals[0]); attr->size = vals[1]; diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 1893f66371fa..b12ef382fec7 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -580,8 +580,10 @@ static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay) switch(rets[0]) { case 0: - result = EEH_STATE_MMIO_ACTIVE | - EEH_STATE_DMA_ACTIVE; + result = EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE | + EEH_STATE_MMIO_ENABLED | + EEH_STATE_DMA_ENABLED; break; case 1: result = EEH_STATE_RESET_ACTIVE | diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 29f1a0cc59cd..ae6f7a235d8b 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -2208,6 +2208,9 @@ static long spapr_tce_unset_window(struct iommu_table_group *table_group, int nu const char *win_name; int ret = -ENODEV; + if (!tbl) /* The table was never created OR window was never opened */ + return 0; + mutex_lock(&dma_win_init_mutex); if ((num == 0) && is_default_window_table(table_group, tbl)) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 1798f0f14d58..62bd8e2d5d4c 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -53,7 +53,7 @@ struct update_props_workarea { static unsigned int nmi_wd_lpm_factor = 200; #ifdef CONFIG_SYSCTL -static struct ctl_table nmi_wd_lpm_factor_ctl_table[] = { +static const struct ctl_table nmi_wd_lpm_factor_ctl_table[] = { { .procname = "nmi_wd_lpm_factor", .data = &nmi_wd_lpm_factor, diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata index 2acc7d876e1f..e318119d570d 100644 --- a/arch/riscv/Kconfig.errata +++ b/arch/riscv/Kconfig.errata @@ -119,4 +119,15 @@ config ERRATA_THEAD_PMU If you don't know what to do here, say "Y". +config ERRATA_THEAD_GHOSTWRITE + bool "Apply T-Head Ghostwrite errata" + depends on ERRATA_THEAD && RISCV_ISA_XTHEADVECTOR + default y + help + The T-Head C9xx cores have a vulnerability in the xtheadvector + instruction set. When this errata is enabled, the CPUs will be probed + to determine if they are vulnerable and disable xtheadvector. + + If you don't know what to do here, say "Y". + endmenu # "CPU errata selection" diff --git a/arch/riscv/Kconfig.vendor b/arch/riscv/Kconfig.vendor index 6f1cdd32ed29..b096548fe0ff 100644 --- a/arch/riscv/Kconfig.vendor +++ b/arch/riscv/Kconfig.vendor @@ -16,4 +16,30 @@ config RISCV_ISA_VENDOR_EXT_ANDES If you don't know what to do here, say Y. endmenu +menu "T-Head" +config RISCV_ISA_VENDOR_EXT_THEAD + bool "T-Head vendor extension support" + select RISCV_ISA_VENDOR_EXT + default y + help + Say N here to disable detection of and support for all T-Head vendor + extensions. Without this option enabled, T-Head vendor extensions will + not be detected at boot and their presence not reported to userspace. + + If you don't know what to do here, say Y. + +config RISCV_ISA_XTHEADVECTOR + bool "xtheadvector extension support" + depends on RISCV_ISA_VENDOR_EXT_THEAD + depends on RISCV_ISA_V + depends on FPU + default y + help + Say N here if you want to disable all xtheadvector related procedures + in the kernel. This will disable vector for any T-Head board that + contains xtheadvector rather than the standard vector. + + If you don't know what to do here, say Y. +endmenu + endmenu diff --git a/arch/riscv/Makefile.postlink b/arch/riscv/Makefile.postlink index 829b9abc91f6..6b0580949b6a 100644 --- a/arch/riscv/Makefile.postlink +++ b/arch/riscv/Makefile.postlink @@ -10,6 +10,7 @@ __archpost: -include include/config/auto.conf include $(srctree)/scripts/Kbuild.include +include $(srctree)/scripts/Makefile.lib quiet_cmd_relocs_check = CHKREL $@ cmd_relocs_check = \ @@ -19,11 +20,6 @@ ifdef CONFIG_RELOCATABLE quiet_cmd_cp_vmlinux_relocs = CPREL vmlinux.relocs cmd_cp_vmlinux_relocs = cp vmlinux vmlinux.relocs -quiet_cmd_relocs_strip = STRIPREL $@ -cmd_relocs_strip = $(OBJCOPY) --remove-section='.rel.*' \ - --remove-section='.rel__*' \ - --remove-section='.rela.*' \ - --remove-section='.rela__*' $@ endif # `@true` prevents complaint when there is nothing to be done @@ -33,7 +29,7 @@ vmlinux: FORCE ifdef CONFIG_RELOCATABLE $(call if_changed,relocs_check) $(call if_changed,cp_vmlinux_relocs) - $(call if_changed,relocs_strip) + $(call if_changed,strip_relocs) endif clean: diff --git a/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi b/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi index 64c3c2e6cbe0..6367112e614a 100644 --- a/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi +++ b/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi @@ -27,7 +27,8 @@ riscv,isa = "rv64imafdc"; riscv,isa-base = "rv64i"; riscv,isa-extensions = "i", "m", "a", "f", "d", "c", "zicntr", "zicsr", - "zifencei", "zihpm"; + "zifencei", "zihpm", "xtheadvector"; + thead,vlenb = <128>; #cooling-cells = <2>; cpu0_intc: interrupt-controller { diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index a924ef116d5e..0f7dcbe3c45b 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -10,7 +10,6 @@ CONFIG_MEMCG=y CONFIG_BLK_CGROUP=y CONFIG_CGROUP_SCHED=y CONFIG_CFS_BANDWIDTH=y -CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_PIDS=y CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_HUGETLB=y diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index e24770a77932..0b942183f708 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -10,6 +10,7 @@ #include <linux/string.h> #include <linux/uaccess.h> #include <asm/alternative.h> +#include <asm/bugs.h> #include <asm/cacheflush.h> #include <asm/cpufeature.h> #include <asm/dma-noncoherent.h> @@ -142,6 +143,31 @@ static bool errata_probe_pmu(unsigned int stage, return true; } +static bool errata_probe_ghostwrite(unsigned int stage, + unsigned long arch_id, unsigned long impid) +{ + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_GHOSTWRITE)) + return false; + + /* + * target-c9xx cores report arch_id and impid as 0 + * + * While ghostwrite may not affect all c9xx cores that implement + * xtheadvector, there is no futher granularity than c9xx. Assume + * vulnerable for this entire class of processors when xtheadvector is + * enabled. + */ + if (arch_id != 0 || impid != 0) + return false; + + if (stage != RISCV_ALTERNATIVES_EARLY_BOOT) + return false; + + ghostwrite_set_vulnerable(); + + return true; +} + static u32 thead_errata_probe(unsigned int stage, unsigned long archid, unsigned long impid) { @@ -155,6 +181,8 @@ static u32 thead_errata_probe(unsigned int stage, if (errata_probe_pmu(stage, archid, impid)) cpu_req_errata |= BIT(ERRATA_THEAD_PMU); + errata_probe_ghostwrite(stage, archid, impid); + return cpu_req_errata; } diff --git a/arch/riscv/include/asm/bugs.h b/arch/riscv/include/asm/bugs.h new file mode 100644 index 000000000000..17ca0a947730 --- /dev/null +++ b/arch/riscv/include/asm/bugs.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Interface for managing mitigations for riscv vulnerabilities. + * + * Copyright (C) 2024 Rivos Inc. + */ + +#ifndef __ASM_BUGS_H +#define __ASM_BUGS_H + +/* Watch out, ordering is important here. */ +enum mitigation_state { + UNAFFECTED, + MITIGATED, + VULNERABLE, +}; + +void ghostwrite_set_vulnerable(void); +bool ghostwrite_enable_mitigation(void); +enum mitigation_state ghostwrite_get_state(void); + +#endif /* __ASM_BUGS_H */ diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index 4bd054c54c21..569140d6e639 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -34,6 +34,8 @@ DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); /* Per-cpu ISA extensions. */ extern struct riscv_isainfo hart_isa[NR_CPUS]; +extern u32 thead_vlenb_of; + void __init riscv_user_isa_enable(void); #define _RISCV_ISA_EXT_DATA(_name, _id, _subset_exts, _subset_exts_size, _validate) { \ diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 37bdea65bbd8..6fed42e37705 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -30,6 +30,12 @@ #define SR_VS_CLEAN _AC(0x00000400, UL) #define SR_VS_DIRTY _AC(0x00000600, UL) +#define SR_VS_THEAD _AC(0x01800000, UL) /* xtheadvector Status */ +#define SR_VS_OFF_THEAD _AC(0x00000000, UL) +#define SR_VS_INITIAL_THEAD _AC(0x00800000, UL) +#define SR_VS_CLEAN_THEAD _AC(0x01000000, UL) +#define SR_VS_DIRTY_THEAD _AC(0x01800000, UL) + #define SR_XS _AC(0x00018000, UL) /* Extension Status */ #define SR_XS_OFF _AC(0x00000000, UL) #define SR_XS_INITIAL _AC(0x00008000, UL) @@ -315,6 +321,15 @@ #define CSR_STIMECMP 0x14D #define CSR_STIMECMPH 0x15D +/* xtheadvector symbolic CSR names */ +#define CSR_VXSAT 0x9 +#define CSR_VXRM 0xa + +/* xtheadvector CSR masks */ +#define CSR_VXRM_MASK 3 +#define CSR_VXRM_SHIFT 1 +#define CSR_VXSAT_MASK 1 + /* Supervisor-Level Window to Indirectly Accessed Registers (AIA) */ #define CSR_SISELECT 0x150 #define CSR_SIREG 0x151 diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 7c8a71a526a3..6e426ed7919a 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -25,7 +25,8 @@ #ifdef CONFIG_ERRATA_THEAD #define ERRATA_THEAD_MAE 0 #define ERRATA_THEAD_PMU 1 -#define ERRATA_THEAD_NUMBER 2 +#define ERRATA_THEAD_GHOSTWRITE 2 +#define ERRATA_THEAD_NUMBER 3 #endif #ifdef __ASSEMBLY__ diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h index fc8130f995c1..72be100afa23 100644 --- a/arch/riscv/include/asm/futex.h +++ b/arch/riscv/include/asm/futex.h @@ -85,7 +85,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, __enable_user_access(); __asm__ __volatile__ ( - "1: lr.w.aqrl %[v],%[u] \n" + "1: lr.w %[v],%[u] \n" " bne %[v],%z[ov],3f \n" "2: sc.w.aqrl %[t],%z[nv],%[u] \n" " bnez %[t],1b \n" diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index 1ce1df6d0ff3..dd624523981c 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* - * Copyright 2023 Rivos, Inc + * Copyright 2023-2024 Rivos, Inc */ #ifndef _ASM_HWPROBE_H @@ -8,7 +8,7 @@ #include <uapi/asm/hwprobe.h> -#define RISCV_HWPROBE_MAX_KEY 10 +#define RISCV_HWPROBE_MAX_KEY 11 static inline bool riscv_hwprobe_key_is_valid(__s64 key) { @@ -21,6 +21,7 @@ static inline bool hwprobe_key_is_bitmask(__s64 key) case RISCV_HWPROBE_KEY_BASE_BEHAVIOR: case RISCV_HWPROBE_KEY_IMA_EXT_0: case RISCV_HWPROBE_KEY_CPUPERF_0: + case RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0: return true; } diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index 94e33216b2d9..0e71eb82f920 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -117,7 +117,7 @@ do { \ __set_prev_cpu(__prev->thread); \ if (has_fpu()) \ __switch_to_fpu(__prev, __next); \ - if (has_vector()) \ + if (has_vector() || has_xtheadvector()) \ __switch_to_vector(__prev, __next); \ if (switch_to_should_flush_icache(__next)) \ local_flush_icache_all(); \ diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index c7c023afbacd..e8a83f55be2b 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -18,6 +18,27 @@ #include <asm/cpufeature.h> #include <asm/csr.h> #include <asm/asm.h> +#include <asm/vendorid_list.h> +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/thead.h> + +#define __riscv_v_vstate_or(_val, TYPE) ({ \ + typeof(_val) _res = _val; \ + if (has_xtheadvector()) \ + _res = (_res & ~SR_VS_THEAD) | SR_VS_##TYPE##_THEAD; \ + else \ + _res = (_res & ~SR_VS) | SR_VS_##TYPE; \ + _res; \ +}) + +#define __riscv_v_vstate_check(_val, TYPE) ({ \ + bool _res; \ + if (has_xtheadvector()) \ + _res = ((_val) & SR_VS_THEAD) == SR_VS_##TYPE##_THEAD; \ + else \ + _res = ((_val) & SR_VS) == SR_VS_##TYPE; \ + _res; \ +}) extern unsigned long riscv_v_vsize; int riscv_v_setup_vsize(void); @@ -41,39 +62,62 @@ static __always_inline bool has_vector(void) return riscv_has_extension_unlikely(RISCV_ISA_EXT_ZVE32X); } +static __always_inline bool has_xtheadvector_no_alternatives(void) +{ + if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) + return riscv_isa_vendor_extension_available(THEAD_VENDOR_ID, XTHEADVECTOR); + else + return false; +} + +static __always_inline bool has_xtheadvector(void) +{ + if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) + return riscv_has_vendor_extension_unlikely(THEAD_VENDOR_ID, + RISCV_ISA_VENDOR_EXT_XTHEADVECTOR); + else + return false; +} + static inline void __riscv_v_vstate_clean(struct pt_regs *regs) { - regs->status = (regs->status & ~SR_VS) | SR_VS_CLEAN; + regs->status = __riscv_v_vstate_or(regs->status, CLEAN); } static inline void __riscv_v_vstate_dirty(struct pt_regs *regs) { - regs->status = (regs->status & ~SR_VS) | SR_VS_DIRTY; + regs->status = __riscv_v_vstate_or(regs->status, DIRTY); } static inline void riscv_v_vstate_off(struct pt_regs *regs) { - regs->status = (regs->status & ~SR_VS) | SR_VS_OFF; + regs->status = __riscv_v_vstate_or(regs->status, OFF); } static inline void riscv_v_vstate_on(struct pt_regs *regs) { - regs->status = (regs->status & ~SR_VS) | SR_VS_INITIAL; + regs->status = __riscv_v_vstate_or(regs->status, INITIAL); } static inline bool riscv_v_vstate_query(struct pt_regs *regs) { - return (regs->status & SR_VS) != 0; + return !__riscv_v_vstate_check(regs->status, OFF); } static __always_inline void riscv_v_enable(void) { - csr_set(CSR_SSTATUS, SR_VS); + if (has_xtheadvector()) + csr_set(CSR_SSTATUS, SR_VS_THEAD); + else + csr_set(CSR_SSTATUS, SR_VS); } static __always_inline void riscv_v_disable(void) { - csr_clear(CSR_SSTATUS, SR_VS); + if (has_xtheadvector()) + csr_clear(CSR_SSTATUS, SR_VS_THEAD); + else + csr_clear(CSR_SSTATUS, SR_VS); } static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest) @@ -82,10 +126,36 @@ static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest) "csrr %0, " __stringify(CSR_VSTART) "\n\t" "csrr %1, " __stringify(CSR_VTYPE) "\n\t" "csrr %2, " __stringify(CSR_VL) "\n\t" - "csrr %3, " __stringify(CSR_VCSR) "\n\t" - "csrr %4, " __stringify(CSR_VLENB) "\n\t" : "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl), - "=r" (dest->vcsr), "=r" (dest->vlenb) : :); + "=r" (dest->vcsr) : :); + + if (has_xtheadvector()) { + unsigned long status; + + /* + * CSR_VCSR is defined as + * [2:1] - vxrm[1:0] + * [0] - vxsat + * The earlier vector spec implemented by T-Head uses separate + * registers for the same bit-elements, so just combine those + * into the existing output field. + * + * Additionally T-Head cores need FS to be enabled when accessing + * the VXRM and VXSAT CSRs, otherwise ending in illegal instructions. + * Though the cores do not implement the VXRM and VXSAT fields in the + * FCSR CSR that vector-0.7.1 specifies. + */ + status = csr_read_set(CSR_STATUS, SR_FS_DIRTY); + dest->vcsr = csr_read(CSR_VXSAT) | csr_read(CSR_VXRM) << CSR_VXRM_SHIFT; + + dest->vlenb = riscv_v_vsize / 32; + + if ((status & SR_FS) != SR_FS_DIRTY) + csr_write(CSR_STATUS, status); + } else { + dest->vcsr = csr_read(CSR_VCSR); + dest->vlenb = csr_read(CSR_VLENB); + } } static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src) @@ -96,9 +166,25 @@ static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src "vsetvl x0, %2, %1\n\t" ".option pop\n\t" "csrw " __stringify(CSR_VSTART) ", %0\n\t" - "csrw " __stringify(CSR_VCSR) ", %3\n\t" - : : "r" (src->vstart), "r" (src->vtype), "r" (src->vl), - "r" (src->vcsr) :); + : : "r" (src->vstart), "r" (src->vtype), "r" (src->vl)); + + if (has_xtheadvector()) { + unsigned long status = csr_read(CSR_SSTATUS); + + /* + * Similar to __vstate_csr_save above, restore values for the + * separate VXRM and VXSAT CSRs from the vcsr variable. + */ + status = csr_read_set(CSR_STATUS, SR_FS_DIRTY); + + csr_write(CSR_VXRM, (src->vcsr >> CSR_VXRM_SHIFT) & CSR_VXRM_MASK); + csr_write(CSR_VXSAT, src->vcsr & CSR_VXSAT_MASK); + + if ((status & SR_FS) != SR_FS_DIRTY) + csr_write(CSR_STATUS, status); + } else { + csr_write(CSR_VCSR, src->vcsr); + } } static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, @@ -108,19 +194,33 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, riscv_v_enable(); __vstate_csr_save(save_to); - asm volatile ( - ".option push\n\t" - ".option arch, +zve32x\n\t" - "vsetvli %0, x0, e8, m8, ta, ma\n\t" - "vse8.v v0, (%1)\n\t" - "add %1, %1, %0\n\t" - "vse8.v v8, (%1)\n\t" - "add %1, %1, %0\n\t" - "vse8.v v16, (%1)\n\t" - "add %1, %1, %0\n\t" - "vse8.v v24, (%1)\n\t" - ".option pop\n\t" - : "=&r" (vl) : "r" (datap) : "memory"); + if (has_xtheadvector()) { + asm volatile ( + "mv t0, %0\n\t" + THEAD_VSETVLI_T4X0E8M8D1 + THEAD_VSB_V_V0T0 + "add t0, t0, t4\n\t" + THEAD_VSB_V_V0T0 + "add t0, t0, t4\n\t" + THEAD_VSB_V_V0T0 + "add t0, t0, t4\n\t" + THEAD_VSB_V_V0T0 + : : "r" (datap) : "memory", "t0", "t4"); + } else { + asm volatile ( + ".option push\n\t" + ".option arch, +zve32x\n\t" + "vsetvli %0, x0, e8, m8, ta, ma\n\t" + "vse8.v v0, (%1)\n\t" + "add %1, %1, %0\n\t" + "vse8.v v8, (%1)\n\t" + "add %1, %1, %0\n\t" + "vse8.v v16, (%1)\n\t" + "add %1, %1, %0\n\t" + "vse8.v v24, (%1)\n\t" + ".option pop\n\t" + : "=&r" (vl) : "r" (datap) : "memory"); + } riscv_v_disable(); } @@ -130,19 +230,33 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ unsigned long vl; riscv_v_enable(); - asm volatile ( - ".option push\n\t" - ".option arch, +zve32x\n\t" - "vsetvli %0, x0, e8, m8, ta, ma\n\t" - "vle8.v v0, (%1)\n\t" - "add %1, %1, %0\n\t" - "vle8.v v8, (%1)\n\t" - "add %1, %1, %0\n\t" - "vle8.v v16, (%1)\n\t" - "add %1, %1, %0\n\t" - "vle8.v v24, (%1)\n\t" - ".option pop\n\t" - : "=&r" (vl) : "r" (datap) : "memory"); + if (has_xtheadvector()) { + asm volatile ( + "mv t0, %0\n\t" + THEAD_VSETVLI_T4X0E8M8D1 + THEAD_VLB_V_V0T0 + "add t0, t0, t4\n\t" + THEAD_VLB_V_V0T0 + "add t0, t0, t4\n\t" + THEAD_VLB_V_V0T0 + "add t0, t0, t4\n\t" + THEAD_VLB_V_V0T0 + : : "r" (datap) : "memory", "t0", "t4"); + } else { + asm volatile ( + ".option push\n\t" + ".option arch, +zve32x\n\t" + "vsetvli %0, x0, e8, m8, ta, ma\n\t" + "vle8.v v0, (%1)\n\t" + "add %1, %1, %0\n\t" + "vle8.v v8, (%1)\n\t" + "add %1, %1, %0\n\t" + "vle8.v v16, (%1)\n\t" + "add %1, %1, %0\n\t" + "vle8.v v24, (%1)\n\t" + ".option pop\n\t" + : "=&r" (vl) : "r" (datap) : "memory"); + } __vstate_csr_restore(restore_from); riscv_v_disable(); } @@ -152,33 +266,41 @@ static inline void __riscv_v_vstate_discard(void) unsigned long vl, vtype_inval = 1UL << (BITS_PER_LONG - 1); riscv_v_enable(); + if (has_xtheadvector()) + asm volatile (THEAD_VSETVLI_T4X0E8M8D1 : : : "t4"); + else + asm volatile ( + ".option push\n\t" + ".option arch, +zve32x\n\t" + "vsetvli %0, x0, e8, m8, ta, ma\n\t" + ".option pop\n\t": "=&r" (vl)); + asm volatile ( ".option push\n\t" ".option arch, +zve32x\n\t" - "vsetvli %0, x0, e8, m8, ta, ma\n\t" "vmv.v.i v0, -1\n\t" "vmv.v.i v8, -1\n\t" "vmv.v.i v16, -1\n\t" "vmv.v.i v24, -1\n\t" "vsetvl %0, x0, %1\n\t" ".option pop\n\t" - : "=&r" (vl) : "r" (vtype_inval) : "memory"); + : "=&r" (vl) : "r" (vtype_inval)); + riscv_v_disable(); } static inline void riscv_v_vstate_discard(struct pt_regs *regs) { - if ((regs->status & SR_VS) == SR_VS_OFF) - return; - - __riscv_v_vstate_discard(); - __riscv_v_vstate_dirty(regs); + if (riscv_v_vstate_query(regs)) { + __riscv_v_vstate_discard(); + __riscv_v_vstate_dirty(regs); + } } static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate, struct pt_regs *regs) { - if ((regs->status & SR_VS) == SR_VS_DIRTY) { + if (__riscv_v_vstate_check(regs->status, DIRTY)) { __riscv_v_vstate_save(vstate, vstate->datap); __riscv_v_vstate_clean(regs); } @@ -187,7 +309,7 @@ static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate, static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate, struct pt_regs *regs) { - if ((regs->status & SR_VS) != SR_VS_OFF) { + if (riscv_v_vstate_query(regs)) { __riscv_v_vstate_restore(vstate, vstate->datap); __riscv_v_vstate_clean(regs); } @@ -196,7 +318,7 @@ static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate, static inline void riscv_v_vstate_set_restore(struct task_struct *task, struct pt_regs *regs) { - if ((regs->status & SR_VS) != SR_VS_OFF) { + if (riscv_v_vstate_query(regs)) { set_tsk_thread_flag(task, TIF_RISCV_V_DEFER_RESTORE); riscv_v_vstate_on(regs); } @@ -270,6 +392,8 @@ struct pt_regs; static inline int riscv_v_setup_vsize(void) { return -EOPNOTSUPP; } static __always_inline bool has_vector(void) { return false; } static __always_inline bool insn_is_vector(u32 insn_buf) { return false; } +static __always_inline bool has_xtheadvector_no_alternatives(void) { return false; } +static __always_inline bool has_xtheadvector(void) { return false; } static inline bool riscv_v_first_use_handler(struct pt_regs *regs) { return false; } static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; } static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; } diff --git a/arch/riscv/include/asm/vendor_extensions/thead.h b/arch/riscv/include/asm/vendor_extensions/thead.h new file mode 100644 index 000000000000..e85c75b3b340 --- /dev/null +++ b/arch/riscv/include/asm/vendor_extensions/thead.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_VENDOR_EXTENSIONS_THEAD_H +#define _ASM_RISCV_VENDOR_EXTENSIONS_THEAD_H + +#include <asm/vendor_extensions.h> + +#include <linux/types.h> + +/* + * Extension keys must be strictly less than RISCV_ISA_VENDOR_EXT_MAX. + */ +#define RISCV_ISA_VENDOR_EXT_XTHEADVECTOR 0 + +extern struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_thead; + +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD +void disable_xtheadvector(void); +#else +static inline void disable_xtheadvector(void) { } +#endif + +/* Extension specific helpers */ + +/* + * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older + * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for + * vsetvli t4, x0, e8, m8, d1 + */ +#define THEAD_VSETVLI_T4X0E8M8D1 ".long 0x00307ed7\n\t" + +/* + * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same + * encoding as the standard vse8.v and vle8.v, compilers seem to optimize + * the call resulting in a different encoding and then using a value for + * the "mop" field that is not part of vector-0.7.1 + * So encode specific variants for vstate_save and _restore. + */ +#define THEAD_VSB_V_V0T0 ".long 0x02028027\n\t" +#define THEAD_VSB_V_V8T0 ".long 0x02028427\n\t" +#define THEAD_VSB_V_V16T0 ".long 0x02028827\n\t" +#define THEAD_VSB_V_V24T0 ".long 0x02028c27\n\t" +#define THEAD_VLB_V_V0T0 ".long 0x012028007\n\t" +#define THEAD_VLB_V_V8T0 ".long 0x012028407\n\t" +#define THEAD_VLB_V_V16T0 ".long 0x012028807\n\t" +#define THEAD_VLB_V_V24T0 ".long 0x012028c07\n\t" + +#endif diff --git a/arch/riscv/include/asm/vendor_extensions/thead_hwprobe.h b/arch/riscv/include/asm/vendor_extensions/thead_hwprobe.h new file mode 100644 index 000000000000..65a9c5612466 --- /dev/null +++ b/arch/riscv/include/asm/vendor_extensions/thead_hwprobe.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_VENDOR_EXTENSIONS_THEAD_HWPROBE_H +#define _ASM_RISCV_VENDOR_EXTENSIONS_THEAD_HWPROBE_H + +#include <linux/cpumask.h> + +#include <uapi/asm/hwprobe.h> + +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD +void hwprobe_isa_vendor_ext_thead_0(struct riscv_hwprobe *pair, const struct cpumask *cpus); +#else +static inline void hwprobe_isa_vendor_ext_thead_0(struct riscv_hwprobe *pair, + const struct cpumask *cpus) +{ + pair->value = 0; +} +#endif + +#endif diff --git a/arch/riscv/include/asm/vendor_extensions/vendor_hwprobe.h b/arch/riscv/include/asm/vendor_extensions/vendor_hwprobe.h new file mode 100644 index 000000000000..6b9293e984a9 --- /dev/null +++ b/arch/riscv/include/asm/vendor_extensions/vendor_hwprobe.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2024 Rivos, Inc + */ + +#ifndef _ASM_RISCV_SYS_HWPROBE_H +#define _ASM_RISCV_SYS_HWPROBE_H + +#include <asm/cpufeature.h> + +#define VENDOR_EXT_KEY(ext) \ + do { \ + if (__riscv_isa_extension_available(isainfo->isa, RISCV_ISA_VENDOR_EXT_##ext)) \ + pair->value |= RISCV_HWPROBE_VENDOR_EXT_##ext; \ + else \ + missing |= RISCV_HWPROBE_VENDOR_EXT_##ext; \ + } while (false) + +/* + * Loop through and record extensions that 1) anyone has, and 2) anyone + * doesn't have. + * + * _extension_checks is an arbitrary C block to set the values of pair->value + * and missing. It should be filled with VENDOR_EXT_KEY expressions. + */ +#define VENDOR_EXTENSION_SUPPORTED(pair, cpus, per_hart_vendor_bitmap, _extension_checks) \ + do { \ + int cpu; \ + u64 missing = 0; \ + for_each_cpu(cpu, (cpus)) { \ + struct riscv_isavendorinfo *isainfo = &(per_hart_vendor_bitmap)[cpu]; \ + _extension_checks \ + } \ + (pair)->value &= ~missing; \ + } while (false) \ + +#endif /* _ASM_RISCV_SYS_HWPROBE_H */ diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 3af142b99f77..c3c1cc951cb9 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* - * Copyright 2023 Rivos, Inc + * Copyright 2023-2024 Rivos, Inc */ #ifndef _UAPI_ASM_HWPROBE_H @@ -94,6 +94,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW 2 #define RISCV_HWPROBE_MISALIGNED_VECTOR_FAST 3 #define RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED 4 +#define RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0 11 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ /* Flags */ diff --git a/arch/riscv/include/uapi/asm/vendor/thead.h b/arch/riscv/include/uapi/asm/vendor/thead.h new file mode 100644 index 000000000000..43790ebe5faf --- /dev/null +++ b/arch/riscv/include/uapi/asm/vendor/thead.h @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#define RISCV_HWPROBE_VENDOR_EXT_XTHEADVECTOR (1 << 0) diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 063d1faf5a53..8d186bfced45 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -123,3 +123,5 @@ obj-$(CONFIG_COMPAT) += compat_vdso/ obj-$(CONFIG_64BIT) += pi/ obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o + +obj-$(CONFIG_GENERIC_CPU_VULNERABILITIES) += bugs.o diff --git a/arch/riscv/kernel/bugs.c b/arch/riscv/kernel/bugs.c new file mode 100644 index 000000000000..3655fe7d678c --- /dev/null +++ b/arch/riscv/kernel/bugs.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Rivos Inc. + */ + +#include <linux/cpu.h> +#include <linux/device.h> +#include <linux/sprintf.h> + +#include <asm/bugs.h> +#include <asm/vendor_extensions/thead.h> + +static enum mitigation_state ghostwrite_state; + +void ghostwrite_set_vulnerable(void) +{ + ghostwrite_state = VULNERABLE; +} + +/* + * Vendor extension alternatives will use the value set at the time of boot + * alternative patching, thus this must be called before boot alternatives are + * patched (and after extension probing) to be effective. + * + * Returns true if mitgated, false otherwise. + */ +bool ghostwrite_enable_mitigation(void) +{ + if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR) && + ghostwrite_state == VULNERABLE && !cpu_mitigations_off()) { + disable_xtheadvector(); + ghostwrite_state = MITIGATED; + return true; + } + + return false; +} + +enum mitigation_state ghostwrite_get_state(void) +{ + return ghostwrite_state; +} + +ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) { + switch (ghostwrite_state) { + case UNAFFECTED: + return sprintf(buf, "Not affected\n"); + case MITIGATED: + return sprintf(buf, "Mitigation: xtheadvector disabled\n"); + case VULNERABLE: + fallthrough; + default: + return sprintf(buf, "Vulnerable\n"); + } + } else { + return sprintf(buf, "Not affected\n"); + } +} diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index c0916ed318c2..c6ba750536c3 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -17,6 +17,7 @@ #include <linux/of.h> #include <asm/acpi.h> #include <asm/alternative.h> +#include <asm/bugs.h> #include <asm/cacheflush.h> #include <asm/cpufeature.h> #include <asm/hwcap.h> @@ -26,6 +27,7 @@ #include <asm/sbi.h> #include <asm/vector.h> #include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/thead.h> #define NUM_ALPHA_EXTS ('z' - 'a' + 1) @@ -39,6 +41,8 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; /* Per-cpu ISA extensions. */ struct riscv_isainfo hart_isa[NR_CPUS]; +u32 thead_vlenb_of; + /** * riscv_isa_extension_base() - Get base extension word * @@ -791,9 +795,50 @@ static void __init riscv_fill_vendor_ext_list(int cpu) } } +static int has_thead_homogeneous_vlenb(void) +{ + int cpu; + u32 prev_vlenb = 0; + u32 vlenb; + + /* Ignore thead,vlenb property if xtheavector is not enabled in the kernel */ + if (!IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) + return 0; + + for_each_possible_cpu(cpu) { + struct device_node *cpu_node; + + cpu_node = of_cpu_device_node_get(cpu); + if (!cpu_node) { + pr_warn("Unable to find cpu node\n"); + return -ENOENT; + } + + if (of_property_read_u32(cpu_node, "thead,vlenb", &vlenb)) { + of_node_put(cpu_node); + + if (prev_vlenb) + return -ENOENT; + continue; + } + + if (prev_vlenb && vlenb != prev_vlenb) { + of_node_put(cpu_node); + return -ENOENT; + } + + prev_vlenb = vlenb; + of_node_put(cpu_node); + } + + thead_vlenb_of = vlenb; + return 0; +} + static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) { unsigned int cpu; + bool mitigated; for_each_possible_cpu(cpu) { unsigned long this_hwcap = 0; @@ -844,6 +889,17 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) riscv_fill_vendor_ext_list(cpu); } + /* + * Execute ghostwrite mitigation immediately after detecting extensions + * to disable xtheadvector if necessary. + */ + mitigated = ghostwrite_enable_mitigation(); + + if (!mitigated && has_xtheadvector_no_alternatives() && has_thead_homogeneous_vlenb() < 0) { + pr_warn("Unsupported heterogeneous vlenb detected, vector extension disabled.\n"); + disable_xtheadvector(); + } + if (bitmap_empty(riscv_isa, RISCV_ISA_EXT_MAX)) return -ENOENT; @@ -896,7 +952,8 @@ void __init riscv_fill_hwcap(void) elf_hwcap &= ~COMPAT_HWCAP_ISA_F; } - if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_ZVE32X)) { + if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_ZVE32X) || + has_xtheadvector_no_alternatives()) { /* * This cannot fail when called on the boot hart */ diff --git a/arch/riscv/kernel/kernel_mode_vector.c b/arch/riscv/kernel/kernel_mode_vector.c index 6afe80c7f03a..99972a48e86b 100644 --- a/arch/riscv/kernel/kernel_mode_vector.c +++ b/arch/riscv/kernel/kernel_mode_vector.c @@ -143,7 +143,7 @@ static int riscv_v_start_kernel_context(bool *is_nested) /* Transfer the ownership of V from user to kernel, then save */ riscv_v_start(RISCV_PREEMPT_V | RISCV_PREEMPT_V_DIRTY); - if ((task_pt_regs(current)->status & SR_VS) == SR_VS_DIRTY) { + if (__riscv_v_vstate_check(task_pt_regs(current)->status, DIRTY)) { uvstate = ¤t->thread.vstate; __riscv_v_vstate_save(uvstate, uvstate->datap); } @@ -160,7 +160,7 @@ asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs) return; depth = riscv_v_ctx_get_depth(); - if (depth == 0 && (regs->status & SR_VS) == SR_VS_DIRTY) + if (depth == 0 && __riscv_v_vstate_check(regs->status, DIRTY)) riscv_preempt_v_set_dirty(); riscv_v_ctx_depth_inc(); @@ -208,7 +208,7 @@ void kernel_vector_begin(void) { bool nested = false; - if (WARN_ON(!has_vector())) + if (WARN_ON(!(has_vector() || has_xtheadvector()))) return; BUG_ON(!may_use_simd()); @@ -236,7 +236,7 @@ EXPORT_SYMBOL_GPL(kernel_vector_begin); */ void kernel_vector_end(void) { - if (WARN_ON(!has_vector())) + if (WARN_ON(!(has_vector() || has_xtheadvector()))) return; riscv_v_disable(); diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 58b6482c2bf6..7c244de77180 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -190,7 +190,7 @@ void flush_thread(void) void arch_release_task_struct(struct task_struct *tsk) { /* Free the vector context of datap. */ - if (has_vector()) + if (has_vector() || has_xtheadvector()) riscv_v_thread_free(tsk); } @@ -240,7 +240,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.s[0] = 0; } p->thread.riscv_v_flags = 0; - if (has_vector()) + if (has_vector() || has_xtheadvector()) riscv_v_thread_alloc(p); p->thread.ra = (unsigned long)ret_from_fork; p->thread.sp = (unsigned long)childregs; /* kernel sp */ @@ -364,7 +364,7 @@ static bool try_to_set_pmm(unsigned long value) * disable it for tasks that already opted in to the relaxed ABI. */ -static struct ctl_table tagged_addr_sysctl_table[] = { +static const struct ctl_table tagged_addr_sysctl_table[] = { { .procname = "tagged_addr_disabled", .mode = 0644, diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index dcd282419456..94e905eea1de 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -189,7 +189,7 @@ static long restore_sigcontext(struct pt_regs *regs, return 0; case RISCV_V_MAGIC: - if (!has_vector() || !riscv_v_vstate_query(regs) || + if (!(has_vector() || has_xtheadvector()) || !riscv_v_vstate_query(regs) || size != riscv_v_sc_size) return -EINVAL; @@ -211,7 +211,7 @@ static size_t get_rt_frame_size(bool cal_all) frame_size = sizeof(*frame); - if (has_vector()) { + if (has_vector() || has_xtheadvector()) { if (cal_all || riscv_v_vstate_query(task_pt_regs(current))) total_context_size += riscv_v_sc_size; } @@ -284,7 +284,7 @@ static long setup_sigcontext(struct rt_sigframe __user *frame, if (has_fpu()) err |= save_fp_state(regs, &sc->sc_fpregs); /* Save the vector state. */ - if (has_vector() && riscv_v_vstate_query(regs)) + if ((has_vector() || has_xtheadvector()) && riscv_v_vstate_query(regs)) err |= save_v_state(regs, (void __user **)&sc_ext_ptr); /* Write zero to fp-reserved space and check it on restore_sigcontext */ err |= __put_user(0, &sc->sc_extdesc.reserved); diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index cb93adfffc48..bcd3b816306c 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -15,6 +15,7 @@ #include <asm/uaccess.h> #include <asm/unistd.h> #include <asm/vector.h> +#include <asm/vendor_extensions/thead_hwprobe.h> #include <vdso/vsyscall.h> @@ -286,6 +287,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, pair->value = riscv_timebase; break; + case RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0: + hwprobe_isa_vendor_ext_thead_0(pair, cpus); + break; + /* * For forward compatibility, unknown keys don't fail the whole * call, but get their element key set to -1 and value set to 0 diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c index 821818886fab..184f780c932d 100644 --- a/arch/riscv/kernel/vector.c +++ b/arch/riscv/kernel/vector.c @@ -33,7 +33,17 @@ int riscv_v_setup_vsize(void) { unsigned long this_vsize; - /* There are 32 vector registers with vlenb length. */ + /* + * There are 32 vector registers with vlenb length. + * + * If the thead,vlenb property was provided by the firmware, use that + * instead of probing the CSRs. + */ + if (thead_vlenb_of) { + riscv_v_vsize = thead_vlenb_of * 32; + return 0; + } + riscv_v_enable(); this_vsize = csr_read(CSR_VLENB) * 32; riscv_v_disable(); @@ -53,7 +63,7 @@ int riscv_v_setup_vsize(void) void __init riscv_v_setup_ctx_cache(void) { - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return; riscv_v_user_cachep = kmem_cache_create_usercopy("riscv_vector_ctx", @@ -173,7 +183,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs) u32 __user *epc = (u32 __user *)regs->epc; u32 insn = (u32)regs->badaddr; - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return false; /* Do not handle if V is not supported, or disabled */ @@ -216,7 +226,7 @@ void riscv_v_vstate_ctrl_init(struct task_struct *tsk) bool inherit; int cur, next; - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return; next = riscv_v_ctrl_get_next(tsk); @@ -238,7 +248,7 @@ void riscv_v_vstate_ctrl_init(struct task_struct *tsk) long riscv_v_vstate_ctrl_get_current(void) { - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return -EINVAL; return current->thread.vstate_ctrl & PR_RISCV_V_VSTATE_CTRL_MASK; @@ -249,7 +259,7 @@ long riscv_v_vstate_ctrl_set_current(unsigned long arg) bool inherit; int cur, next; - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return -EINVAL; if (arg & ~PR_RISCV_V_VSTATE_CTRL_MASK) @@ -287,7 +297,7 @@ long riscv_v_vstate_ctrl_set_current(unsigned long arg) #ifdef CONFIG_SYSCTL -static struct ctl_table riscv_v_default_vstate_table[] = { +static const struct ctl_table riscv_v_default_vstate_table[] = { { .procname = "riscv_v_default_allow", .data = &riscv_v_implicit_uacc, @@ -299,7 +309,7 @@ static struct ctl_table riscv_v_default_vstate_table[] = { static int __init riscv_v_sysctl_init(void) { - if (has_vector()) + if (has_vector() || has_xtheadvector()) if (!register_sysctl("abi", riscv_v_default_vstate_table)) return -EINVAL; return 0; @@ -309,7 +319,7 @@ static int __init riscv_v_sysctl_init(void) static int __init riscv_v_sysctl_init(void) { return 0; } #endif /* ! CONFIG_SYSCTL */ -static int riscv_v_init(void) +static int __init riscv_v_init(void) { return riscv_v_sysctl_init(); } diff --git a/arch/riscv/kernel/vendor_extensions.c b/arch/riscv/kernel/vendor_extensions.c index a8126d118341..a31ff84740eb 100644 --- a/arch/riscv/kernel/vendor_extensions.c +++ b/arch/riscv/kernel/vendor_extensions.c @@ -6,6 +6,7 @@ #include <asm/vendorid_list.h> #include <asm/vendor_extensions.h> #include <asm/vendor_extensions/andes.h> +#include <asm/vendor_extensions/thead.h> #include <linux/array_size.h> #include <linux/types.h> @@ -14,6 +15,9 @@ struct riscv_isa_vendor_ext_data_list *riscv_isa_vendor_ext_list[] = { #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES &riscv_isa_vendor_ext_list_andes, #endif +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD + &riscv_isa_vendor_ext_list_thead, +#endif }; const size_t riscv_isa_vendor_ext_list_size = ARRAY_SIZE(riscv_isa_vendor_ext_list); @@ -41,6 +45,12 @@ bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsig cpu_bmap = riscv_isa_vendor_ext_list_andes.per_hart_isa_bitmap; break; #endif + #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD + case THEAD_VENDOR_ID: + bmap = &riscv_isa_vendor_ext_list_thead.all_harts_isa_bitmap; + cpu_bmap = riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap; + break; + #endif default: return false; } diff --git a/arch/riscv/kernel/vendor_extensions/Makefile b/arch/riscv/kernel/vendor_extensions/Makefile index 6a61aed944f1..866414c81a9f 100644 --- a/arch/riscv/kernel/vendor_extensions/Makefile +++ b/arch/riscv/kernel/vendor_extensions/Makefile @@ -1,3 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_ANDES) += andes.o +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead.o +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead_hwprobe.o diff --git a/arch/riscv/kernel/vendor_extensions/thead.c b/arch/riscv/kernel/vendor_extensions/thead.c new file mode 100644 index 000000000000..519dbf70710a --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/thead.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <asm/cpufeature.h> +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/thead.h> + +#include <linux/array_size.h> +#include <linux/cpumask.h> +#include <linux/types.h> + +/* All T-Head vendor extensions supported in Linux */ +static const struct riscv_isa_ext_data riscv_isa_vendor_ext_thead[] = { + __RISCV_ISA_EXT_DATA(xtheadvector, RISCV_ISA_VENDOR_EXT_XTHEADVECTOR), +}; + +struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_thead = { + .ext_data_count = ARRAY_SIZE(riscv_isa_vendor_ext_thead), + .ext_data = riscv_isa_vendor_ext_thead, +}; + +void disable_xtheadvector(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + clear_bit(RISCV_ISA_VENDOR_EXT_XTHEADVECTOR, riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap[cpu].isa); + + clear_bit(RISCV_ISA_VENDOR_EXT_XTHEADVECTOR, riscv_isa_vendor_ext_list_thead.all_harts_isa_bitmap.isa); +} diff --git a/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c b/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c new file mode 100644 index 000000000000..2eba34011786 --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <asm/vendor_extensions/thead.h> +#include <asm/vendor_extensions/thead_hwprobe.h> +#include <asm/vendor_extensions/vendor_hwprobe.h> + +#include <linux/cpumask.h> +#include <linux/types.h> + +#include <uapi/asm/hwprobe.h> +#include <uapi/asm/vendor/thead.h> + +void hwprobe_isa_vendor_ext_thead_0(struct riscv_hwprobe *pair, const struct cpumask *cpus) +{ + VENDOR_EXTENSION_SUPPORTED(pair, cpus, + riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap, { + VENDOR_EXT_KEY(XTHEADVECTOR); + }); +} diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index a9f2b4af8f3f..0194324a0c50 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -22,6 +22,57 @@ #include "../kernel/head.h" +static void show_pte(unsigned long addr) +{ + pgd_t *pgdp, pgd; + p4d_t *p4dp, p4d; + pud_t *pudp, pud; + pmd_t *pmdp, pmd; + pte_t *ptep, pte; + struct mm_struct *mm = current->mm; + + if (!mm) + mm = &init_mm; + + pr_alert("Current %s pgtable: %luK pagesize, %d-bit VAs, pgdp=0x%016llx\n", + current->comm, PAGE_SIZE / SZ_1K, VA_BITS, + mm == &init_mm ? (u64)__pa_symbol(mm->pgd) : virt_to_phys(mm->pgd)); + + pgdp = pgd_offset(mm, addr); + pgd = pgdp_get(pgdp); + pr_alert("[%016lx] pgd=%016lx", addr, pgd_val(pgd)); + if (pgd_none(pgd) || pgd_bad(pgd) || pgd_leaf(pgd)) + goto out; + + p4dp = p4d_offset(pgdp, addr); + p4d = p4dp_get(p4dp); + pr_cont(", p4d=%016lx", p4d_val(p4d)); + if (p4d_none(p4d) || p4d_bad(p4d) || p4d_leaf(p4d)) + goto out; + + pudp = pud_offset(p4dp, addr); + pud = pudp_get(pudp); + pr_cont(", pud=%016lx", pud_val(pud)); + if (pud_none(pud) || pud_bad(pud) || pud_leaf(pud)) + goto out; + + pmdp = pmd_offset(pudp, addr); + pmd = pmdp_get(pmdp); + pr_cont(", pmd=%016lx", pmd_val(pmd)); + if (pmd_none(pmd) || pmd_bad(pmd) || pmd_leaf(pmd)) + goto out; + + ptep = pte_offset_map(pmdp, addr); + if (!ptep) + goto out; + + pte = ptep_get(ptep); + pr_cont(", pte=%016lx", pte_val(pte)); + pte_unmap(ptep); +out: + pr_cont("\n"); +} + static void die_kernel_fault(const char *msg, unsigned long addr, struct pt_regs *regs) { @@ -31,6 +82,7 @@ static void die_kernel_fault(const char *msg, unsigned long addr, addr); bust_spinlocks(0); + show_pte(addr); die(regs, "Oops"); make_task_dead(SIGKILL); } diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 722178ae3488..15b2eda4c364 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -268,8 +268,12 @@ static void __init setup_bootmem(void) */ if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_MMU)) { max_mapped_addr = __pa(PAGE_OFFSET) + KERN_VIRT_SIZE; - memblock_cap_memory_range(phys_ram_base, - max_mapped_addr - phys_ram_base); + if (memblock_end_of_DRAM() > max_mapped_addr) { + memblock_cap_memory_range(phys_ram_base, + max_mapped_addr - phys_ram_base); + pr_warn("Physical memory overflows the linear mapping size: region above %pa removed", + &max_mapped_addr); + } } /* diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 6e9545d8b0c7..9c9ec08d78c7 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -52,13 +52,19 @@ config KASAN_SHADOW_OFFSET depends on KASAN default 0x1C000000000000 -config GCC_ASM_FLAG_OUTPUT_BROKEN +config CC_ASM_FLAG_OUTPUT_BROKEN def_bool CC_IS_GCC && GCC_VERSION < 140200 help GCC versions before 14.2.0 may die with an internal compiler error in some configurations if flag output operands are used within inline assemblies. +config CC_HAS_ASM_AOR_FORMAT_FLAGS + def_bool !(CC_IS_CLANG && CLANG_VERSION < 190100) + help + Clang versions before 19.1.0 do not support A, + O, and R inline assembly format flags. + config S390 def_bool y # @@ -72,6 +78,7 @@ config S390 select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 + select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_CRC32 select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 3f25498dac65..5fae311203c2 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -22,7 +22,7 @@ KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__ ifndef CONFIG_AS_IS_LLVM KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf)) endif -KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack +KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack -std=gnu11 KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY KBUILD_CFLAGS_DECOMPRESSOR += -D__DECOMPRESSOR KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbackchain diff --git a/arch/s390/Makefile.postlink b/arch/s390/Makefile.postlink index df82f5410769..1ae5478cd6ac 100644 --- a/arch/s390/Makefile.postlink +++ b/arch/s390/Makefile.postlink @@ -11,6 +11,7 @@ __archpost: -include include/config/auto.conf include $(srctree)/scripts/Kbuild.include +include $(srctree)/scripts/Makefile.lib CMD_RELOCS=arch/s390/tools/relocs OUT_RELOCS = arch/s390/boot @@ -19,11 +20,6 @@ quiet_cmd_relocs = RELOCS $(OUT_RELOCS)/relocs.S mkdir -p $(OUT_RELOCS); \ $(CMD_RELOCS) $@ > $(OUT_RELOCS)/relocs.S -quiet_cmd_strip_relocs = RSTRIP $@ - cmd_strip_relocs = \ - $(OBJCOPY) --remove-section='.rel.*' --remove-section='.rel__*' \ - --remove-section='.rela.*' --remove-section='.rela__*' $@ - vmlinux: FORCE $(call cmd,relocs) $(call cmd,strip_relocs) diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 91a30e017d65..dd7ba7587dd5 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -52,7 +52,7 @@ static int appldata_interval_handler(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static struct ctl_table_header *appldata_sysctl_header; -static struct ctl_table appldata_table[] = { +static const struct ctl_table appldata_table[] = { { .procname = "timer", .mode = S_IRUGO | S_IWUSR, diff --git a/arch/s390/boot/als.c b/arch/s390/boot/als.c index 11e0c3d5dbc8..79afb5fa7f1f 100644 --- a/arch/s390/boot/als.c +++ b/arch/s390/boot/als.c @@ -46,7 +46,7 @@ void print_missing_facilities(void) * z/VM adds a four character prefix. */ if (strlen(als_str) > 70) { - boot_printk("%s\n", als_str); + boot_emerg("%s\n", als_str); *als_str = '\0'; } u16_to_decimal(val_str, i * BITS_PER_LONG + j); @@ -54,7 +54,7 @@ void print_missing_facilities(void) first = 0; } } - boot_printk("%s\n", als_str); + boot_emerg("%s\n", als_str); } static void facility_mismatch(void) @@ -62,10 +62,10 @@ static void facility_mismatch(void) struct cpuid id; get_cpu_id(&id); - boot_printk("The Linux kernel requires more recent processor hardware\n"); - boot_printk("Detected machine-type number: %4x\n", id.machine); + boot_emerg("The Linux kernel requires more recent processor hardware\n"); + boot_emerg("Detected machine-type number: %4x\n", id.machine); print_missing_facilities(); - boot_printk("See Principles of Operations for facility bits\n"); + boot_emerg("See Principles of Operations for facility bits\n"); disabled_wait(); } diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 56244fe78182..69f261566a64 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -8,6 +8,7 @@ #ifndef __ASSEMBLY__ +#include <linux/printk.h> #include <asm/physmem_info.h> struct machine_info { @@ -47,13 +48,16 @@ void physmem_set_usable_limit(unsigned long limit); void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size); void physmem_free(enum reserved_range_type type); /* for continuous/multiple allocations per type */ -unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size, - unsigned long align); +unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size, + unsigned long align); +unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size, + unsigned long align, bool die_on_oom); /* for single allocations, 1 per type */ unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size, unsigned long align, unsigned long min, unsigned long max, bool die_on_oom); unsigned long get_physmem_alloc_pos(void); +void dump_physmem_reserved(void); bool ipl_report_certs_intersects(unsigned long addr, unsigned long size, unsigned long *intersection_start); bool is_ipl_block_dump(void); @@ -69,12 +73,28 @@ void print_pgm_check_info(void); unsigned long randomize_within_range(unsigned long size, unsigned long align, unsigned long min, unsigned long max); void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit); -void __printf(1, 2) boot_printk(const char *fmt, ...); +int __printf(1, 2) boot_printk(const char *fmt, ...); void print_stacktrace(unsigned long sp); void error(char *m); int get_random(unsigned long limit, unsigned long *value); +void boot_rb_dump(void); + +#ifndef boot_fmt +#define boot_fmt(fmt) fmt +#endif + +#define boot_emerg(fmt, ...) boot_printk(KERN_EMERG boot_fmt(fmt), ##__VA_ARGS__) +#define boot_alert(fmt, ...) boot_printk(KERN_ALERT boot_fmt(fmt), ##__VA_ARGS__) +#define boot_crit(fmt, ...) boot_printk(KERN_CRIT boot_fmt(fmt), ##__VA_ARGS__) +#define boot_err(fmt, ...) boot_printk(KERN_ERR boot_fmt(fmt), ##__VA_ARGS__) +#define boot_warn(fmt, ...) boot_printk(KERN_WARNING boot_fmt(fmt), ##__VA_ARGS__) +#define boot_notice(fmt, ...) boot_printk(KERN_NOTICE boot_fmt(fmt), ##__VA_ARGS__) +#define boot_info(fmt, ...) boot_printk(KERN_INFO boot_fmt(fmt), ##__VA_ARGS__) +#define boot_debug(fmt, ...) boot_printk(KERN_DEBUG boot_fmt(fmt), ##__VA_ARGS__) extern struct machine_info machine; +extern int boot_console_loglevel; +extern bool boot_ignore_loglevel; /* Symbols defined by linker scripts */ extern const char kernel_version[]; diff --git a/arch/s390/boot/decompressor.c b/arch/s390/boot/decompressor.c index f478e8e9cbda..03500b9d9fb9 100644 --- a/arch/s390/boot/decompressor.c +++ b/arch/s390/boot/decompressor.c @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/string.h> +#include <asm/boot_data.h> #include <asm/page.h> #include "decompressor.h" #include "boot.h" @@ -63,6 +64,15 @@ static unsigned long free_mem_end_ptr = (unsigned long) _end + BOOT_HEAP_SIZE; #include "../../../../lib/decompress_unzstd.c" #endif +static void decompress_error(char *m) +{ + if (bootdebug) + boot_rb_dump(); + boot_emerg("Decompression error: %s\n", m); + boot_emerg(" -- System halted\n"); + disabled_wait(); +} + unsigned long mem_safe_offset(void) { return ALIGN(free_mem_end_ptr, PAGE_SIZE); @@ -71,5 +81,5 @@ unsigned long mem_safe_offset(void) void deploy_kernel(void *output) { __decompress(_compressed_start, _compressed_end - _compressed_start, - NULL, NULL, output, vmlinux.image_size, NULL, error); + NULL, NULL, output, vmlinux.image_size, NULL, decompress_error); } diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 557462e62cd7..d3731f2983b7 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -215,7 +215,7 @@ static void check_cleared_facilities(void) for (i = 0; i < ARRAY_SIZE(als); i++) { if ((stfle_fac_list[i] & als[i]) != als[i]) { - boot_printk("Warning: The Linux kernel requires facilities cleared via command line option\n"); + boot_emerg("The Linux kernel requires facilities cleared via command line option\n"); print_missing_facilities(); break; } @@ -313,5 +313,23 @@ void parse_boot_command_line(void) #endif if (!strcmp(param, "relocate_lowcore") && test_facility(193)) relocate_lowcore = 1; + if (!strcmp(param, "earlyprintk")) + boot_earlyprintk = true; + if (!strcmp(param, "debug")) + boot_console_loglevel = CONSOLE_LOGLEVEL_DEBUG; + if (!strcmp(param, "bootdebug")) { + bootdebug = true; + if (val) + strncpy(bootdebug_filter, val, sizeof(bootdebug_filter) - 1); + } + if (!strcmp(param, "quiet")) + boot_console_loglevel = CONSOLE_LOGLEVEL_QUIET; + if (!strcmp(param, "ignore_loglevel")) + boot_ignore_loglevel = true; + if (!strcmp(param, "loglevel")) { + boot_console_loglevel = simple_strtoull(val, NULL, 10); + if (boot_console_loglevel < CONSOLE_LOGLEVEL_MIN) + boot_console_loglevel = CONSOLE_LOGLEVEL_MIN; + } } } diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c index d00898852a88..f73cd757a5f7 100644 --- a/arch/s390/boot/ipl_report.c +++ b/arch/s390/boot/ipl_report.c @@ -30,7 +30,6 @@ static unsigned long get_cert_comp_list_size(void) { struct ipl_rb_certificate_entry *cert; struct ipl_rb_component_entry *comp; - size_t size; /* * Find the length for the IPL report boot data @@ -155,7 +154,7 @@ void save_ipl_cert_comp_list(void) return; size = get_cert_comp_list_size(); - early_ipl_comp_list_addr = physmem_alloc_top_down(RR_CERT_COMP_LIST, size, sizeof(int)); + early_ipl_comp_list_addr = physmem_alloc_or_die(RR_CERT_COMP_LIST, size, sizeof(int)); ipl_cert_list_addr = early_ipl_comp_list_addr + early_ipl_comp_list_size; copy_components_bootdata(); diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index f864d2bff775..941f4c9e27cc 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -32,7 +32,7 @@ struct prng_parm { static int check_prng(void) { if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG)) { - boot_printk("KASLR disabled: CPU has no PRNG\n"); + boot_warn("KASLR disabled: CPU has no PRNG\n"); return 0; } if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) @@ -168,7 +168,7 @@ static unsigned long iterate_valid_positions(unsigned long size, unsigned long a * cannot have chains. * * On the other hand, "dynamic" or "repetitive" allocations are done via - * physmem_alloc_top_down(). These allocations are tightly packed together + * physmem_alloc_or_die(). These allocations are tightly packed together * top down from the end of online memory. physmem_alloc_pos represents * current position where those allocations start. * diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c index 5abe59fb3bc0..633f11600aab 100644 --- a/arch/s390/boot/pgm_check_info.c +++ b/arch/s390/boot/pgm_check_info.c @@ -17,13 +17,14 @@ void print_stacktrace(unsigned long sp) (unsigned long)_stack_end }; bool first = true; - boot_printk("Call Trace:\n"); + boot_emerg("Call Trace:\n"); while (!(sp & 0x7) && on_stack(&boot_stack, sp, sizeof(struct stack_frame))) { struct stack_frame *sf = (struct stack_frame *)sp; - boot_printk(first ? "(sp:%016lx [<%016lx>] %pS)\n" : - " sp:%016lx [<%016lx>] %pS\n", - sp, sf->gprs[8], (void *)sf->gprs[8]); + if (first) + boot_emerg("(sp:%016lx [<%016lx>] %pS)\n", sp, sf->gprs[8], (void *)sf->gprs[8]); + else + boot_emerg(" sp:%016lx [<%016lx>] %pS\n", sp, sf->gprs[8], (void *)sf->gprs[8]); if (sf->back_chain <= sp) break; sp = sf->back_chain; @@ -36,30 +37,30 @@ void print_pgm_check_info(void) unsigned long *gpregs = (unsigned long *)get_lowcore()->gpregs_save_area; struct psw_bits *psw = &psw_bits(get_lowcore()->psw_save_area); - boot_printk("Linux version %s\n", kernel_version); + if (bootdebug) + boot_rb_dump(); + boot_emerg("Linux version %s\n", kernel_version); if (!is_prot_virt_guest() && early_command_line[0]) - boot_printk("Kernel command line: %s\n", early_command_line); - boot_printk("Kernel fault: interruption code %04x ilc:%x\n", - get_lowcore()->pgm_code, get_lowcore()->pgm_ilc >> 1); + boot_emerg("Kernel command line: %s\n", early_command_line); + boot_emerg("Kernel fault: interruption code %04x ilc:%d\n", + get_lowcore()->pgm_code, get_lowcore()->pgm_ilc >> 1); if (kaslr_enabled()) { - boot_printk("Kernel random base: %lx\n", __kaslr_offset); - boot_printk("Kernel random base phys: %lx\n", __kaslr_offset_phys); + boot_emerg("Kernel random base: %lx\n", __kaslr_offset); + boot_emerg("Kernel random base phys: %lx\n", __kaslr_offset_phys); } - boot_printk("PSW : %016lx %016lx (%pS)\n", - get_lowcore()->psw_save_area.mask, - get_lowcore()->psw_save_area.addr, - (void *)get_lowcore()->psw_save_area.addr); - boot_printk( - " R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n", - psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck, - psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri, - psw->eaba); - boot_printk("GPRS: %016lx %016lx %016lx %016lx\n", gpregs[0], gpregs[1], gpregs[2], gpregs[3]); - boot_printk(" %016lx %016lx %016lx %016lx\n", gpregs[4], gpregs[5], gpregs[6], gpregs[7]); - boot_printk(" %016lx %016lx %016lx %016lx\n", gpregs[8], gpregs[9], gpregs[10], gpregs[11]); - boot_printk(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]); + boot_emerg("PSW : %016lx %016lx (%pS)\n", + get_lowcore()->psw_save_area.mask, + get_lowcore()->psw_save_area.addr, + (void *)get_lowcore()->psw_save_area.addr); + boot_emerg(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n", + psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck, + psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri, psw->eaba); + boot_emerg("GPRS: %016lx %016lx %016lx %016lx\n", gpregs[0], gpregs[1], gpregs[2], gpregs[3]); + boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[4], gpregs[5], gpregs[6], gpregs[7]); + boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[8], gpregs[9], gpregs[10], gpregs[11]); + boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]); print_stacktrace(get_lowcore()->gpregs_save_area[15]); - boot_printk("Last Breaking-Event-Address:\n"); - boot_printk(" [<%016lx>] %pS\n", (unsigned long)get_lowcore()->pgm_last_break, - (void *)get_lowcore()->pgm_last_break); + boot_emerg("Last Breaking-Event-Address:\n"); + boot_emerg(" [<%016lx>] %pS\n", (unsigned long)get_lowcore()->pgm_last_break, + (void *)get_lowcore()->pgm_last_break); } diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c index 7617aa2d2f7e..aa096ef68e8c 100644 --- a/arch/s390/boot/physmem_info.c +++ b/arch/s390/boot/physmem_info.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "physmem: " fmt #include <linux/processor.h> #include <linux/errno.h> #include <linux/init.h> @@ -28,7 +29,7 @@ static struct physmem_range *__get_physmem_range_ptr(u32 n) return &physmem_info.online[n]; if (unlikely(!physmem_info.online_extended)) { physmem_info.online_extended = (struct physmem_range *)physmem_alloc_range( - RR_MEM_DETECT_EXTENDED, ENTRIES_EXTENDED_MAX, sizeof(long), 0, + RR_MEM_DETECT_EXT, ENTRIES_EXTENDED_MAX, sizeof(long), 0, physmem_alloc_pos, true); } return &physmem_info.online_extended[n - MEM_INLINED_ENTRIES]; @@ -207,11 +208,16 @@ unsigned long detect_max_physmem_end(void) max_physmem_end = search_mem_end(); physmem_info.info_source = MEM_DETECT_BIN_SEARCH; } + boot_debug("Max physical memory: 0x%016lx (info source: %s)\n", max_physmem_end, + get_physmem_info_source()); return max_physmem_end; } void detect_physmem_online_ranges(unsigned long max_physmem_end) { + unsigned long start, end; + int i; + if (!sclp_early_read_storage_info()) { physmem_info.info_source = MEM_DETECT_SCLP_STOR_INFO; } else if (physmem_info.info_source == MEM_DETECT_DIAG500_STOR_LIMIT) { @@ -226,12 +232,16 @@ void detect_physmem_online_ranges(unsigned long max_physmem_end) } else if (max_physmem_end) { add_physmem_online_range(0, max_physmem_end); } + boot_debug("Online memory ranges (info source: %s):\n", get_physmem_info_source()); + for_each_physmem_online_range(i, &start, &end) + boot_debug(" online [%d]: 0x%016lx-0x%016lx\n", i, start, end); } void physmem_set_usable_limit(unsigned long limit) { physmem_info.usable = limit; physmem_alloc_pos = limit; + boot_debug("Usable memory limit: 0x%016lx\n", limit); } static void die_oom(unsigned long size, unsigned long align, unsigned long min, unsigned long max) @@ -241,38 +251,47 @@ static void die_oom(unsigned long size, unsigned long align, unsigned long min, enum reserved_range_type t; int i; - boot_printk("Linux version %s\n", kernel_version); + boot_emerg("Linux version %s\n", kernel_version); if (!is_prot_virt_guest() && early_command_line[0]) - boot_printk("Kernel command line: %s\n", early_command_line); - boot_printk("Out of memory allocating %lx bytes %lx aligned in range %lx:%lx\n", - size, align, min, max); - boot_printk("Reserved memory ranges:\n"); + boot_emerg("Kernel command line: %s\n", early_command_line); + boot_emerg("Out of memory allocating %lu bytes 0x%lx aligned in range %lx:%lx\n", + size, align, min, max); + boot_emerg("Reserved memory ranges:\n"); for_each_physmem_reserved_range(t, range, &start, &end) { - boot_printk("%016lx %016lx %s\n", start, end, get_rr_type_name(t)); + boot_emerg("%016lx %016lx %s\n", start, end, get_rr_type_name(t)); total_reserved_mem += end - start; } - boot_printk("Usable online memory ranges (info source: %s [%x]):\n", - get_physmem_info_source(), physmem_info.info_source); + boot_emerg("Usable online memory ranges (info source: %s [%d]):\n", + get_physmem_info_source(), physmem_info.info_source); for_each_physmem_usable_range(i, &start, &end) { - boot_printk("%016lx %016lx\n", start, end); + boot_emerg("%016lx %016lx\n", start, end); total_mem += end - start; } - boot_printk("Usable online memory total: %lx Reserved: %lx Free: %lx\n", - total_mem, total_reserved_mem, - total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0); + boot_emerg("Usable online memory total: %lu Reserved: %lu Free: %lu\n", + total_mem, total_reserved_mem, + total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0); print_stacktrace(current_frame_address()); - boot_printk("\n\n -- System halted\n"); + boot_emerg(" -- System halted\n"); disabled_wait(); } -void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size) +static void _physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size) { physmem_info.reserved[type].start = addr; physmem_info.reserved[type].end = addr + size; } +void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size) +{ + _physmem_reserve(type, addr, size); + boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Reserve:", addr, addr + size, + get_rr_type_name(type)); +} + void physmem_free(enum reserved_range_type type) { + boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Free:", physmem_info.reserved[type].start, + physmem_info.reserved[type].end, get_rr_type_name(type)); physmem_info.reserved[type].start = 0; physmem_info.reserved[type].end = 0; } @@ -339,41 +358,73 @@ unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long s max = min(max, physmem_alloc_pos); addr = __physmem_alloc_range(size, align, min, max, 0, NULL, die_on_oom); if (addr) - physmem_reserve(type, addr, size); + _physmem_reserve(type, addr, size); + boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Alloc range:", addr, addr + size, + get_rr_type_name(type)); return addr; } -unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size, - unsigned long align) +unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size, + unsigned long align, bool die_on_oom) { struct reserved_range *range = &physmem_info.reserved[type]; - struct reserved_range *new_range; + struct reserved_range *new_range = NULL; unsigned int ranges_left; unsigned long addr; addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, physmem_alloc_ranges, - &ranges_left, true); + &ranges_left, die_on_oom); + if (!addr) + return 0; /* if not a consecutive allocation of the same type or first allocation */ if (range->start != addr + size) { if (range->end) { - physmem_alloc_pos = __physmem_alloc_range( - sizeof(struct reserved_range), 0, 0, physmem_alloc_pos, - physmem_alloc_ranges, &ranges_left, true); - new_range = (struct reserved_range *)physmem_alloc_pos; + addr = __physmem_alloc_range(sizeof(struct reserved_range), 0, 0, + physmem_alloc_pos, physmem_alloc_ranges, + &ranges_left, true); + new_range = (struct reserved_range *)addr; + addr = __physmem_alloc_range(size, align, 0, addr, ranges_left, + &ranges_left, die_on_oom); + if (!addr) + return 0; *new_range = *range; range->chain = new_range; - addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, - ranges_left, &ranges_left, true); } range->end = addr + size; } + if (type != RR_VMEM) { + boot_debug("%-14s 0x%016lx-0x%016lx %-20s align 0x%lx split %d\n", "Alloc topdown:", + addr, addr + size, get_rr_type_name(type), align, !!new_range); + } range->start = addr; physmem_alloc_pos = addr; physmem_alloc_ranges = ranges_left; return addr; } +unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size, + unsigned long align) +{ + return physmem_alloc(type, size, align, true); +} + unsigned long get_physmem_alloc_pos(void) { return physmem_alloc_pos; } + +void dump_physmem_reserved(void) +{ + struct reserved_range *range; + enum reserved_range_type t; + unsigned long start, end; + + boot_debug("Reserved memory ranges:\n"); + for_each_physmem_reserved_range(t, range, &start, &end) { + if (end) { + boot_debug("%-14s 0x%016lx-0x%016lx @%012lx chain %012lx\n", + get_rr_type_name(t), start, end, (unsigned long)range, + (unsigned long)range->chain); + } + } +} diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c index 35f18f2b936e..b4c66fa667d5 100644 --- a/arch/s390/boot/printk.c +++ b/arch/s390/boot/printk.c @@ -5,21 +5,111 @@ #include <linux/ctype.h> #include <asm/stacktrace.h> #include <asm/boot_data.h> +#include <asm/sections.h> #include <asm/lowcore.h> #include <asm/setup.h> #include <asm/sclp.h> #include <asm/uv.h> #include "boot.h" +int boot_console_loglevel = CONFIG_CONSOLE_LOGLEVEL_DEFAULT; +bool boot_ignore_loglevel; +char __bootdata(boot_rb)[PAGE_SIZE * 2]; +bool __bootdata(boot_earlyprintk); +size_t __bootdata(boot_rb_off); +char __bootdata(bootdebug_filter)[128]; +bool __bootdata(bootdebug); + +static void boot_rb_add(const char *str, size_t len) +{ + /* leave double '\0' in the end */ + size_t avail = sizeof(boot_rb) - boot_rb_off - 1; + + /* store strings separated by '\0' */ + if (len + 1 > avail) + boot_rb_off = 0; + strcpy(boot_rb + boot_rb_off, str); + boot_rb_off += len + 1; +} + +static void print_rb_entry(const char *str) +{ + sclp_early_printk(printk_skip_level(str)); +} + +static bool debug_messages_printed(void) +{ + return boot_earlyprintk && (boot_ignore_loglevel || boot_console_loglevel > LOGLEVEL_DEBUG); +} + +void boot_rb_dump(void) +{ + if (debug_messages_printed()) + return; + sclp_early_printk("Boot messages ring buffer:\n"); + boot_rb_foreach(print_rb_entry); +} + const char hex_asc[] = "0123456789abcdef"; static char *as_hex(char *dst, unsigned long val, int pad) { - char *p, *end = p = dst + max(pad, (int)__fls(val | 1) / 4 + 1); + char *p = dst + max(pad, (int)__fls(val | 1) / 4 + 1); - for (*p-- = 0; p >= dst; val >>= 4) + for (*p-- = '\0'; p >= dst; val >>= 4) *p-- = hex_asc[val & 0x0f]; - return end; + return dst; +} + +#define MAX_NUMLEN 21 +static char *as_dec(char *buf, unsigned long val, bool is_signed) +{ + bool negative = false; + char *p = buf + MAX_NUMLEN; + + if (is_signed && (long)val < 0) { + val = (val == LONG_MIN ? LONG_MIN : -(long)val); + negative = true; + } + + *--p = '\0'; + do { + *--p = '0' + (val % 10); + val /= 10; + } while (val); + + if (negative) + *--p = '-'; + return p; +} + +static ssize_t strpad(char *dst, size_t dst_size, const char *src, + int _pad, bool zero_pad, bool decimal) +{ + ssize_t len = strlen(src), pad = _pad; + char *p = dst; + + if (max(len, abs(pad)) >= dst_size) + return -E2BIG; + + if (pad > len) { + if (decimal && zero_pad && *src == '-') { + *p++ = '-'; + src++; + len--; + pad--; + } + memset(p, zero_pad ? '0' : ' ', pad - len); + p += pad - len; + } + memcpy(p, src, len); + p += len; + if (pad < 0 && -pad > len) { + memset(p, ' ', -pad - len); + p += -pad - len; + } + *p = '\0'; + return p - dst; } static char *symstart(char *p) @@ -58,35 +148,94 @@ static noinline char *findsym(unsigned long ip, unsigned short *off, unsigned sh return NULL; } -static noinline char *strsym(void *ip) +#define MAX_SYMLEN 64 +static noinline char *strsym(char *buf, void *ip) { - static char buf[64]; unsigned short off; unsigned short len; char *p; p = findsym((unsigned long)ip, &off, &len); if (p) { - strncpy(buf, p, sizeof(buf)); + strncpy(buf, p, MAX_SYMLEN); /* reserve 15 bytes for offset/len in symbol+0x1234/0x1234 */ - p = buf + strnlen(buf, sizeof(buf) - 15); + p = buf + strnlen(buf, MAX_SYMLEN - 15); strcpy(p, "+0x"); - p = as_hex(p + 3, off, 0); - strcpy(p, "/0x"); - as_hex(p + 3, len, 0); + as_hex(p + 3, off, 0); + strcat(p, "/0x"); + as_hex(p + strlen(p), len, 0); } else { as_hex(buf, (unsigned long)ip, 16); } return buf; } -void boot_printk(const char *fmt, ...) +static inline int printk_loglevel(const char *buf) +{ + if (buf[0] == KERN_SOH_ASCII && buf[1]) { + switch (buf[1]) { + case '0' ... '7': + return buf[1] - '0'; + } + } + return MESSAGE_LOGLEVEL_DEFAULT; +} + +static void boot_console_earlyprintk(const char *buf) +{ + int level = printk_loglevel(buf); + + /* always print emergency messages */ + if (level > LOGLEVEL_EMERG && !boot_earlyprintk) + return; + buf = printk_skip_level(buf); + /* print debug messages only when bootdebug is enabled */ + if (level == LOGLEVEL_DEBUG && (!bootdebug || !bootdebug_filter_match(skip_timestamp(buf)))) + return; + if (boot_ignore_loglevel || level < boot_console_loglevel) + sclp_early_printk(buf); +} + +static char *add_timestamp(char *buf) +{ +#ifdef CONFIG_PRINTK_TIME + union tod_clock *boot_clock = (union tod_clock *)&get_lowcore()->boot_clock; + unsigned long ns = tod_to_ns(get_tod_clock() - boot_clock->tod); + char ts[MAX_NUMLEN]; + + *buf++ = '['; + buf += strpad(buf, MAX_NUMLEN, as_dec(ts, ns / NSEC_PER_SEC, 0), 5, 0, 0); + *buf++ = '.'; + buf += strpad(buf, MAX_NUMLEN, as_dec(ts, (ns % NSEC_PER_SEC) / NSEC_PER_USEC, 0), 6, 1, 0); + *buf++ = ']'; + *buf++ = ' '; +#endif + return buf; +} + +#define va_arg_len_type(args, lenmod, typemod) \ + ((lenmod == 'l') ? va_arg(args, typemod long) : \ + (lenmod == 'h') ? (typemod short)va_arg(args, typemod int) : \ + (lenmod == 'H') ? (typemod char)va_arg(args, typemod int) : \ + (lenmod == 'z') ? va_arg(args, typemod long) : \ + va_arg(args, typemod int)) + +int boot_printk(const char *fmt, ...) { char buf[1024] = { 0 }; char *end = buf + sizeof(buf) - 1; /* make sure buf is 0 terminated */ - unsigned long pad; - char *p = buf; + bool zero_pad, decimal; + char *strval, *p = buf; + char valbuf[MAX(MAX_SYMLEN, MAX_NUMLEN)]; va_list args; + char lenmod; + ssize_t len; + int pad; + + *p++ = KERN_SOH_ASCII; + *p++ = printk_get_level(fmt) ?: '0' + MESSAGE_LOGLEVEL_DEFAULT; + p = add_timestamp(p); + fmt = printk_skip_level(fmt); va_start(args, fmt); for (; p < end && *fmt; fmt++) { @@ -94,31 +243,56 @@ void boot_printk(const char *fmt, ...) *p++ = *fmt; continue; } - pad = isdigit(*++fmt) ? simple_strtol(fmt, (char **)&fmt, 10) : 0; + if (*++fmt == '%') { + *p++ = '%'; + continue; + } + zero_pad = (*fmt == '0'); + pad = simple_strtol(fmt, (char **)&fmt, 10); + lenmod = (*fmt == 'h' || *fmt == 'l' || *fmt == 'z') ? *fmt++ : 0; + if (lenmod == 'h' && *fmt == 'h') { + lenmod = 'H'; + fmt++; + } + decimal = false; switch (*fmt) { case 's': - p = buf + strlcat(buf, va_arg(args, char *), sizeof(buf)); + if (lenmod) + goto out; + strval = va_arg(args, char *); + zero_pad = false; break; case 'p': - if (*++fmt != 'S') + if (*++fmt != 'S' || lenmod) goto out; - p = buf + strlcat(buf, strsym(va_arg(args, void *)), sizeof(buf)); + strval = strsym(valbuf, va_arg(args, void *)); + zero_pad = false; break; - case 'l': - if (*++fmt != 'x' || end - p <= max(sizeof(long) * 2, pad)) - goto out; - p = as_hex(p, va_arg(args, unsigned long), pad); + case 'd': + case 'i': + strval = as_dec(valbuf, va_arg_len_type(args, lenmod, signed), 1); + decimal = true; + break; + case 'u': + strval = as_dec(valbuf, va_arg_len_type(args, lenmod, unsigned), 0); break; case 'x': - if (end - p <= max(sizeof(int) * 2, pad)) - goto out; - p = as_hex(p, va_arg(args, unsigned int), pad); + strval = as_hex(valbuf, va_arg_len_type(args, lenmod, unsigned), 0); break; default: goto out; } + len = strpad(p, end - p, strval, pad, zero_pad, decimal); + if (len == -E2BIG) + break; + p += len; } out: va_end(args); - sclp_early_printk(buf); + len = strlen(buf); + if (len) { + boot_rb_add(buf, len); + boot_console_earlyprintk(buf); + } + return len; } diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index e6b06692ddc8..885bd1dd2c82 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "startup: " fmt #include <linux/string.h> #include <linux/elf.h> #include <asm/page-states.h> @@ -42,7 +43,8 @@ struct machine_info machine; void error(char *x) { - boot_printk("\n\n%s\n\n -- System halted", x); + boot_emerg("%s\n", x); + boot_emerg(" -- System halted\n"); disabled_wait(); } @@ -143,7 +145,7 @@ static void rescue_initrd(unsigned long min, unsigned long max) return; old_addr = addr; physmem_free(RR_INITRD); - addr = physmem_alloc_top_down(RR_INITRD, size, 0); + addr = physmem_alloc_or_die(RR_INITRD, size, 0); memmove((void *)addr, (void *)old_addr, size); } @@ -222,12 +224,16 @@ static void setup_ident_map_size(unsigned long max_physmem_end) if (oldmem_data.start) { __kaslr_enabled = 0; ident_map_size = min(ident_map_size, oldmem_data.size); + boot_debug("kdump memory limit: 0x%016lx\n", oldmem_data.size); } else if (ipl_block_valid && is_ipl_block_dump()) { __kaslr_enabled = 0; - if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) + if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) { ident_map_size = min(ident_map_size, hsa_size); + boot_debug("Stand-alone dump limit: 0x%016lx\n", hsa_size); + } } #endif + boot_debug("Identity map size: 0x%016lx\n", ident_map_size); } #define FIXMAP_SIZE round_up(MEMCPY_REAL_SIZE + ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore)) @@ -267,6 +273,7 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) BUILD_BUG_ON(!IS_ALIGNED(__NO_KASLR_START_KERNEL, THREAD_SIZE)); BUILD_BUG_ON(__NO_KASLR_END_KERNEL > _REGION1_SIZE); vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION3_SIZE); + boot_debug("vmem size estimated: 0x%016lx\n", vsize); if (IS_ENABLED(CONFIG_KASAN) || __NO_KASLR_END_KERNEL > _REGION2_SIZE || (vsize > _REGION2_SIZE && kaslr_enabled())) { asce_limit = _REGION1_SIZE; @@ -290,8 +297,10 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) * otherwise asce_limit and rte_size would have been adjusted. */ vmax = adjust_to_uv_max(asce_limit); + boot_debug("%d level paging 0x%016lx vmax\n", vmax == _REGION1_SIZE ? 4 : 3, vmax); #ifdef CONFIG_KASAN BUILD_BUG_ON(__NO_KASLR_END_KERNEL > KASAN_SHADOW_START); + boot_debug("KASAN shadow area: 0x%016lx-0x%016lx\n", KASAN_SHADOW_START, KASAN_SHADOW_END); /* force vmalloc and modules below kasan shadow */ vmax = min(vmax, KASAN_SHADOW_START); #endif @@ -305,19 +314,27 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) pos = 0; kernel_end = vmax - pos * THREAD_SIZE; kernel_start = round_down(kernel_end - kernel_size, THREAD_SIZE); + boot_debug("Randomization range: 0x%016lx-0x%016lx\n", vmax - kaslr_len, vmax); + boot_debug("kernel image: 0x%016lx-0x%016lx (kaslr)\n", kernel_start, + kernel_size + kernel_size); } else if (vmax < __NO_KASLR_END_KERNEL || vsize > __NO_KASLR_END_KERNEL) { kernel_start = round_down(vmax - kernel_size, THREAD_SIZE); - boot_printk("The kernel base address is forced to %lx\n", kernel_start); + boot_debug("kernel image: 0x%016lx-0x%016lx (constrained)\n", kernel_start, + kernel_start + kernel_size); } else { kernel_start = __NO_KASLR_START_KERNEL; + boot_debug("kernel image: 0x%016lx-0x%016lx (nokaslr)\n", kernel_start, + kernel_start + kernel_size); } __kaslr_offset = kernel_start; + boot_debug("__kaslr_offset: 0x%016lx\n", __kaslr_offset); MODULES_END = round_down(kernel_start, _SEGMENT_SIZE); MODULES_VADDR = MODULES_END - MODULES_LEN; VMALLOC_END = MODULES_VADDR; if (IS_ENABLED(CONFIG_KMSAN)) VMALLOC_END -= MODULES_LEN * 2; + boot_debug("modules area: 0x%016lx-0x%016lx\n", MODULES_VADDR, MODULES_END); /* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */ vsize = (VMALLOC_END - FIXMAP_SIZE) / 2; @@ -329,10 +346,15 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) VMALLOC_END -= vmalloc_size * 2; } VMALLOC_START = VMALLOC_END - vmalloc_size; + boot_debug("vmalloc area: 0x%016lx-0x%016lx\n", VMALLOC_START, VMALLOC_END); __memcpy_real_area = round_down(VMALLOC_START - MEMCPY_REAL_SIZE, PAGE_SIZE); + boot_debug("memcpy real area: 0x%016lx-0x%016lx\n", __memcpy_real_area, + __memcpy_real_area + MEMCPY_REAL_SIZE); __abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore)); + boot_debug("abs lowcore: 0x%016lx-0x%016lx\n", __abs_lowcore, + __abs_lowcore + ABS_LOWCORE_MAP_SIZE); /* split remaining virtual space between 1:1 mapping & vmemmap array */ pages = __abs_lowcore / (PAGE_SIZE + sizeof(struct page)); @@ -352,8 +374,11 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) BUILD_BUG_ON(MAX_DCSS_ADDR > (1UL << MAX_PHYSMEM_BITS)); max_mappable = max(ident_map_size, MAX_DCSS_ADDR); max_mappable = min(max_mappable, vmemmap_start); - if (IS_ENABLED(CONFIG_RANDOMIZE_IDENTITY_BASE)) - __identity_base = round_down(vmemmap_start - max_mappable, rte_size); +#ifdef CONFIG_RANDOMIZE_IDENTITY_BASE + __identity_base = round_down(vmemmap_start - max_mappable, rte_size); +#endif + boot_debug("identity map: 0x%016lx-0x%016lx\n", __identity_base, + __identity_base + ident_map_size); return asce_limit; } @@ -412,6 +437,10 @@ void startup_kernel(void) psw_t psw; setup_lpp(); + store_ipl_parmblock(); + uv_query_info(); + setup_boot_command_line(); + parse_boot_command_line(); /* * Non-randomized kernel physical start address must be _SEGMENT_SIZE @@ -431,12 +460,8 @@ void startup_kernel(void) oldmem_data.start = parmarea.oldmem_base; oldmem_data.size = parmarea.oldmem_size; - store_ipl_parmblock(); read_ipl_report(); - uv_query_info(); sclp_early_read_info(); - setup_boot_command_line(); - parse_boot_command_line(); detect_facilities(); cmma_init(); sanitize_prot_virt_host(); @@ -526,6 +551,7 @@ void startup_kernel(void) __kaslr_offset, __kaslr_offset_phys); kaslr_adjust_got(__kaslr_offset); setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit); + dump_physmem_reserved(); copy_bootdata(); __apply_alternatives((struct alt_instr *)_vmlinux_info.alt_instructions, (struct alt_instr *)_vmlinux_info.alt_instructions_end, @@ -542,5 +568,6 @@ void startup_kernel(void) */ psw.addr = __kaslr_offset + vmlinux.entry; psw.mask = PSW_KERNEL_BITS; + boot_debug("Starting kernel at: 0x%016lx\n", psw.addr); __load_psw(psw); } diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 881a1ece422f..cfca94a8eac4 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "vmem: " fmt #include <linux/sched/task.h> #include <linux/pgtable.h> #include <linux/kasan.h> @@ -13,6 +14,7 @@ #include "decompressor.h" #include "boot.h" +#define INVALID_PHYS_ADDR (~(phys_addr_t)0) struct ctlreg __bootdata_preserved(s390_invalid_asce); #ifdef CONFIG_PROC_FS @@ -31,12 +33,42 @@ enum populate_mode { POPULATE_IDENTITY, POPULATE_KERNEL, #ifdef CONFIG_KASAN + /* KASAN modes should be last and grouped together, see is_kasan_populate_mode() */ POPULATE_KASAN_MAP_SHADOW, POPULATE_KASAN_ZERO_SHADOW, POPULATE_KASAN_SHALLOW #endif }; +#define POPULATE_MODE_NAME(t) case POPULATE_ ## t: return #t +static inline const char *get_populate_mode_name(enum populate_mode t) +{ + switch (t) { + POPULATE_MODE_NAME(NONE); + POPULATE_MODE_NAME(DIRECT); + POPULATE_MODE_NAME(LOWCORE); + POPULATE_MODE_NAME(ABS_LOWCORE); + POPULATE_MODE_NAME(IDENTITY); + POPULATE_MODE_NAME(KERNEL); +#ifdef CONFIG_KASAN + POPULATE_MODE_NAME(KASAN_MAP_SHADOW); + POPULATE_MODE_NAME(KASAN_ZERO_SHADOW); + POPULATE_MODE_NAME(KASAN_SHALLOW); +#endif + default: + return "UNKNOWN"; + } +} + +static bool is_kasan_populate_mode(enum populate_mode mode) +{ +#ifdef CONFIG_KASAN + return mode >= POPULATE_KASAN_MAP_SHADOW; +#else + return false; +#endif +} + static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode); #ifdef CONFIG_KASAN @@ -52,9 +84,12 @@ static pte_t pte_z; static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode) { - start = PAGE_ALIGN_DOWN(__sha(start)); - end = PAGE_ALIGN(__sha(end)); - pgtable_populate(start, end, mode); + unsigned long sha_start = PAGE_ALIGN_DOWN(__sha(start)); + unsigned long sha_end = PAGE_ALIGN(__sha(end)); + + boot_debug("%-17s 0x%016lx-0x%016lx >> 0x%016lx-0x%016lx\n", get_populate_mode_name(mode), + start, end, sha_start, sha_end); + pgtable_populate(sha_start, sha_end, mode); } static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end) @@ -200,7 +235,7 @@ static void *boot_crst_alloc(unsigned long val) unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER; unsigned long *table; - table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size); + table = (unsigned long *)physmem_alloc_or_die(RR_VMEM, size, size); crst_table_init(table, val); __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER); return table; @@ -216,7 +251,7 @@ static pte_t *boot_pte_alloc(void) * during POPULATE_KASAN_MAP_SHADOW when EDAT is off */ if (!pte_leftover) { - pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE); + pte_leftover = (void *)physmem_alloc_or_die(RR_VMEM, PAGE_SIZE, PAGE_SIZE); pte = pte_leftover + _PAGE_TABLE_SIZE; __arch_set_page_dat(pte, 1); } else { @@ -228,11 +263,12 @@ static pte_t *boot_pte_alloc(void) return pte; } -static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_mode mode) +static unsigned long resolve_pa_may_alloc(unsigned long addr, unsigned long size, + enum populate_mode mode) { switch (mode) { case POPULATE_NONE: - return -1; + return INVALID_PHYS_ADDR; case POPULATE_DIRECT: return addr; case POPULATE_LOWCORE: @@ -245,38 +281,64 @@ static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_m return __identity_pa(addr); #ifdef CONFIG_KASAN case POPULATE_KASAN_MAP_SHADOW: - addr = physmem_alloc_top_down(RR_VMEM, size, size); - memset((void *)addr, 0, size); - return addr; + /* Allow to fail large page allocations, this will fall back to 1mb/4k pages */ + addr = physmem_alloc(RR_VMEM, size, size, size == PAGE_SIZE); + if (addr) { + memset((void *)addr, 0, size); + return addr; + } + return INVALID_PHYS_ADDR; #endif default: - return -1; + return INVALID_PHYS_ADDR; } } -static bool large_allowed(enum populate_mode mode) +static bool large_page_mapping_allowed(enum populate_mode mode) { - return (mode == POPULATE_DIRECT) || (mode == POPULATE_IDENTITY) || (mode == POPULATE_KERNEL); + switch (mode) { + case POPULATE_DIRECT: + case POPULATE_IDENTITY: + case POPULATE_KERNEL: +#ifdef CONFIG_KASAN + case POPULATE_KASAN_MAP_SHADOW: +#endif + return true; + default: + return false; + } } -static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end, - enum populate_mode mode) +static unsigned long try_get_large_pud_pa(pud_t *pu_dir, unsigned long addr, unsigned long end, + enum populate_mode mode) { - unsigned long size = end - addr; + unsigned long pa, size = end - addr; + + if (!machine.has_edat2 || !large_page_mapping_allowed(mode) || + !IS_ALIGNED(addr, PUD_SIZE) || (size < PUD_SIZE)) + return INVALID_PHYS_ADDR; + + pa = resolve_pa_may_alloc(addr, size, mode); + if (!IS_ALIGNED(pa, PUD_SIZE)) + return INVALID_PHYS_ADDR; - return machine.has_edat2 && large_allowed(mode) && - IS_ALIGNED(addr, PUD_SIZE) && (size >= PUD_SIZE) && - IS_ALIGNED(_pa(addr, size, mode), PUD_SIZE); + return pa; } -static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end, - enum populate_mode mode) +static unsigned long try_get_large_pmd_pa(pmd_t *pm_dir, unsigned long addr, unsigned long end, + enum populate_mode mode) { - unsigned long size = end - addr; + unsigned long pa, size = end - addr; - return machine.has_edat1 && large_allowed(mode) && - IS_ALIGNED(addr, PMD_SIZE) && (size >= PMD_SIZE) && - IS_ALIGNED(_pa(addr, size, mode), PMD_SIZE); + if (!machine.has_edat1 || !large_page_mapping_allowed(mode) || + !IS_ALIGNED(addr, PMD_SIZE) || (size < PMD_SIZE)) + return INVALID_PHYS_ADDR; + + pa = resolve_pa_may_alloc(addr, size, mode); + if (!IS_ALIGNED(pa, PMD_SIZE)) + return INVALID_PHYS_ADDR; + + return pa; } static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end, @@ -290,7 +352,7 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e if (pte_none(*pte)) { if (kasan_pte_populate_zero_shadow(pte, mode)) continue; - entry = __pte(_pa(addr, PAGE_SIZE, mode)); + entry = __pte(resolve_pa_may_alloc(addr, PAGE_SIZE, mode)); entry = set_pte_bit(entry, PAGE_KERNEL); set_pte(pte, entry); pages++; @@ -303,7 +365,7 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end, enum populate_mode mode) { - unsigned long next, pages = 0; + unsigned long pa, next, pages = 0; pmd_t *pmd, entry; pte_t *pte; @@ -313,8 +375,9 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e if (pmd_none(*pmd)) { if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode)) continue; - if (can_large_pmd(pmd, addr, next, mode)) { - entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode)); + pa = try_get_large_pmd_pa(pmd, addr, next, mode); + if (pa != INVALID_PHYS_ADDR) { + entry = __pmd(pa); entry = set_pmd_bit(entry, SEGMENT_KERNEL); set_pmd(pmd, entry); pages++; @@ -334,7 +397,7 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end, enum populate_mode mode) { - unsigned long next, pages = 0; + unsigned long pa, next, pages = 0; pud_t *pud, entry; pmd_t *pmd; @@ -344,8 +407,9 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e if (pud_none(*pud)) { if (kasan_pud_populate_zero_shadow(pud, addr, next, mode)) continue; - if (can_large_pud(pud, addr, next, mode)) { - entry = __pud(_pa(addr, _REGION3_SIZE, mode)); + pa = try_get_large_pud_pa(pud, addr, next, mode); + if (pa != INVALID_PHYS_ADDR) { + entry = __pud(pa); entry = set_pud_bit(entry, REGION3_KERNEL); set_pud(pud, entry); pages++; @@ -388,6 +452,13 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat pgd_t *pgd; p4d_t *p4d; + if (!is_kasan_populate_mode(mode)) { + boot_debug("%-17s 0x%016lx-0x%016lx -> 0x%016lx-0x%016lx\n", + get_populate_mode_name(mode), addr, end, + resolve_pa_may_alloc(addr, 0, mode), + resolve_pa_may_alloc(end - 1, 0, mode) + 1); + } + pgd = pgd_offset(&init_mm, addr); for (; addr < end; addr = next, pgd++) { next = pgd_addr_end(addr, end); diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h index 4a6b0a8b6412..2e829c16fd8a 100644 --- a/arch/s390/include/asm/asm-extable.h +++ b/arch/s390/include/asm/asm-extable.h @@ -9,11 +9,11 @@ #define EX_TYPE_NONE 0 #define EX_TYPE_FIXUP 1 #define EX_TYPE_BPF 2 -#define EX_TYPE_UA_STORE 3 -#define EX_TYPE_UA_LOAD_MEM 4 +#define EX_TYPE_UA_FAULT 3 #define EX_TYPE_UA_LOAD_REG 5 #define EX_TYPE_UA_LOAD_REGPAIR 6 #define EX_TYPE_ZEROPAD 7 +#define EX_TYPE_FPC 8 #define EX_DATA_REG_ERR_SHIFT 0 #define EX_DATA_REG_ERR GENMASK(3, 0) @@ -69,11 +69,8 @@ #define EX_TABLE_AMODE31(_fault, _target) \ __EX_TABLE(.amode31.ex_table, _fault, _target, EX_TYPE_FIXUP, __stringify(%%r0), __stringify(%%r0), 0) -#define EX_TABLE_UA_STORE(_fault, _target, _regerr) \ - __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_STORE, _regerr, _regerr, 0) - -#define EX_TABLE_UA_LOAD_MEM(_fault, _target, _regerr, _regmem, _len) \ - __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_MEM, _regerr, _regmem, _len) +#define EX_TABLE_UA_FAULT(_fault, _target, _regerr) \ + __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_FAULT, _regerr, _regerr, 0) #define EX_TABLE_UA_LOAD_REG(_fault, _target, _regerr, _regzero) \ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REG, _regerr, _regzero, 0) @@ -84,4 +81,7 @@ #define EX_TABLE_ZEROPAD(_fault, _target, _regdata, _regaddr) \ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_ZEROPAD, _regdata, _regaddr, 0) +#define EX_TABLE_FPC(_fault, _target) \ + __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_FPC, __stringify(%%r0), __stringify(%%r0), 0) + #endif /* __ASM_EXTABLE_H */ diff --git a/arch/s390/include/asm/asm.h b/arch/s390/include/asm/asm.h index ec011b94af2a..e9062b01e2a2 100644 --- a/arch/s390/include/asm/asm.h +++ b/arch/s390/include/asm/asm.h @@ -28,7 +28,7 @@ * [var] also contains the program mask. CC_TRANSFORM() moves the condition * code to the two least significant bits and sets all other bits to zero. */ -#if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_GCC_ASM_FLAG_OUTPUT_BROKEN)) +#if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_CC_ASM_FLAG_OUTPUT_BROKEN)) #define __HAVE_ASM_FLAG_OUTPUTS__ diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 15aa64e3020e..d5125296ade2 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -60,7 +60,7 @@ static __always_inline bool arch_test_bit(unsigned long nr, const volatile unsig asm volatile( " tm %[addr],%[mask]\n" : "=@cc" (cc) - : [addr] "R" (*addr), [mask] "I" (mask) + : [addr] "Q" (*addr), [mask] "I" (mask) ); return cc == 3; } diff --git a/arch/s390/include/asm/boot_data.h b/arch/s390/include/asm/boot_data.h index f7eed27b3220..f55f8227058e 100644 --- a/arch/s390/include/asm/boot_data.h +++ b/arch/s390/include/asm/boot_data.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_S390_BOOT_DATA_H +#include <linux/string.h> #include <asm/setup.h> #include <asm/ipl.h> @@ -15,4 +16,54 @@ extern unsigned long ipl_cert_list_size; extern unsigned long early_ipl_comp_list_addr; extern unsigned long early_ipl_comp_list_size; +extern char boot_rb[PAGE_SIZE * 2]; +extern bool boot_earlyprintk; +extern size_t boot_rb_off; +extern char bootdebug_filter[128]; +extern bool bootdebug; + +#define boot_rb_foreach(cb) \ + do { \ + size_t off = boot_rb_off + strlen(boot_rb + boot_rb_off) + 1; \ + size_t len; \ + for (; off < sizeof(boot_rb) && (len = strlen(boot_rb + off)); off += len + 1) \ + cb(boot_rb + off); \ + for (off = 0; off < boot_rb_off && (len = strlen(boot_rb + off)); off += len + 1) \ + cb(boot_rb + off); \ + } while (0) + +/* + * bootdebug_filter is a comma separated list of strings, + * where each string can be a prefix of the message. + */ +static inline bool bootdebug_filter_match(const char *buf) +{ + char *p = bootdebug_filter, *s; + char *end; + + if (!*p) + return true; + + end = p + strlen(p); + while (p < end) { + p = skip_spaces(p); + s = memscan(p, ',', end - p); + if (!strncmp(p, buf, s - p)) + return true; + p = s + 1; + } + return false; +} + +static inline const char *skip_timestamp(const char *buf) +{ +#ifdef CONFIG_PRINTK_TIME + const char *p = memchr(buf, ']', strlen(buf)); + + if (p && p[1] == ' ') + return p + 2; +#endif + return buf; +} + #endif /* _ASM_S390_BOOT_DATA_H */ diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h index de510c9f6efa..f668bffd6dd3 100644 --- a/arch/s390/include/asm/fpu-insn.h +++ b/arch/s390/include/asm/fpu-insn.h @@ -100,19 +100,12 @@ static __always_inline void fpu_lfpc(unsigned int *fpc) */ static inline void fpu_lfpc_safe(unsigned int *fpc) { - u32 tmp; - instrument_read(fpc, sizeof(*fpc)); asm_inline volatile( - "0: lfpc %[fpc]\n" - "1: nopr %%r7\n" - ".pushsection .fixup, \"ax\"\n" - "2: lghi %[tmp],0\n" - " sfpc %[tmp]\n" - " jg 1b\n" - ".popsection\n" - EX_TABLE(1b, 2b) - : [tmp] "=d" (tmp) + " lfpc %[fpc]\n" + "0: nopr %%r7\n" + EX_TABLE_FPC(0b, 0b) + : : [fpc] "Q" (*fpc) : "memory"); } @@ -183,7 +176,19 @@ static __always_inline void fpu_vgfmg(u8 v1, u8 v2, u8 v3) : "memory"); } -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS + +static __always_inline void fpu_vl(u8 v1, const void *vxr) +{ + instrument_read(vxr, sizeof(__vector128)); + asm volatile("VL %[v1],%O[vxr],,%R[vxr]\n" + : + : [vxr] "Q" (*(__vector128 *)vxr), + [v1] "I" (v1) + : "memory"); +} + +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vl(u8 v1, const void *vxr) { @@ -197,19 +202,7 @@ static __always_inline void fpu_vl(u8 v1, const void *vxr) : "memory", "1"); } -#else /* CONFIG_CC_IS_CLANG */ - -static __always_inline void fpu_vl(u8 v1, const void *vxr) -{ - instrument_read(vxr, sizeof(__vector128)); - asm volatile("VL %[v1],%O[vxr],,%R[vxr]\n" - : - : [vxr] "Q" (*(__vector128 *)vxr), - [v1] "I" (v1) - : "memory"); -} - -#endif /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vleib(u8 v, s16 val, u8 index) { @@ -238,7 +231,7 @@ static __always_inline u64 fpu_vlgvf(u8 v, u16 index) return val; } -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) { @@ -246,17 +239,15 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) size = min(index + 1, sizeof(__vector128)); instrument_read(vxr, size); - asm volatile( - " la 1,%[vxr]\n" - " VLL %[v1],%[index],0,1\n" - : - : [vxr] "R" (*(u8 *)vxr), - [index] "d" (index), - [v1] "I" (v1) - : "memory", "1"); + asm volatile("VLL %[v1],%[index],%O[vxr],%R[vxr]\n" + : + : [vxr] "Q" (*(u8 *)vxr), + [index] "d" (index), + [v1] "I" (v1) + : "memory"); } -#else /* CONFIG_CC_IS_CLANG */ +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) { @@ -264,17 +255,19 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) size = min(index + 1, sizeof(__vector128)); instrument_read(vxr, size); - asm volatile("VLL %[v1],%[index],%O[vxr],%R[vxr]\n" - : - : [vxr] "Q" (*(u8 *)vxr), - [index] "d" (index), - [v1] "I" (v1) - : "memory"); + asm volatile( + " la 1,%[vxr]\n" + " VLL %[v1],%[index],0,1\n" + : + : [vxr] "R" (*(u8 *)vxr), + [index] "d" (index), + [v1] "I" (v1) + : "memory", "1"); } -#endif /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS #define fpu_vlm(_v1, _v3, _vxrs) \ ({ \ @@ -284,17 +277,15 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) } *_v = (void *)(_vxrs); \ \ instrument_read(_v, size); \ - asm volatile( \ - " la 1,%[vxrs]\n" \ - " VLM %[v1],%[v3],0,1\n" \ - : \ - : [vxrs] "R" (*_v), \ - [v1] "I" (_v1), [v3] "I" (_v3) \ - : "memory", "1"); \ + asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \ + : \ + : [vxrs] "Q" (*_v), \ + [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory"); \ (_v3) - (_v1) + 1; \ }) -#else /* CONFIG_CC_IS_CLANG */ +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ #define fpu_vlm(_v1, _v3, _vxrs) \ ({ \ @@ -304,15 +295,17 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) } *_v = (void *)(_vxrs); \ \ instrument_read(_v, size); \ - asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \ - : \ - : [vxrs] "Q" (*_v), \ - [v1] "I" (_v1), [v3] "I" (_v3) \ - : "memory"); \ + asm volatile( \ + " la 1,%[vxrs]\n" \ + " VLM %[v1],%[v3],0,1\n" \ + : \ + : [vxrs] "R" (*_v), \ + [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory", "1"); \ (_v3) - (_v1) + 1; \ }) -#endif /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vlr(u8 v1, u8 v2) { @@ -362,7 +355,18 @@ static __always_inline void fpu_vsrlb(u8 v1, u8 v2, u8 v3) : "memory"); } -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS + +static __always_inline void fpu_vst(u8 v1, const void *vxr) +{ + instrument_write(vxr, sizeof(__vector128)); + asm volatile("VST %[v1],%O[vxr],,%R[vxr]\n" + : [vxr] "=Q" (*(__vector128 *)vxr) + : [v1] "I" (v1) + : "memory"); +} + +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vst(u8 v1, const void *vxr) { @@ -375,20 +379,23 @@ static __always_inline void fpu_vst(u8 v1, const void *vxr) : "memory", "1"); } -#else /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ -static __always_inline void fpu_vst(u8 v1, const void *vxr) +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS + +static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) { - instrument_write(vxr, sizeof(__vector128)); - asm volatile("VST %[v1],%O[vxr],,%R[vxr]\n" - : [vxr] "=Q" (*(__vector128 *)vxr) - : [v1] "I" (v1) + unsigned int size; + + size = min(index + 1, sizeof(__vector128)); + instrument_write(vxr, size); + asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]\n" + : [vxr] "=Q" (*(u8 *)vxr) + : [index] "d" (index), [v1] "I" (v1) : "memory"); } -#endif /* CONFIG_CC_IS_CLANG */ - -#ifdef CONFIG_CC_IS_CLANG +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) { @@ -404,23 +411,9 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) : "memory", "1"); } -#else /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ -static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) -{ - unsigned int size; - - size = min(index + 1, sizeof(__vector128)); - instrument_write(vxr, size); - asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]\n" - : [vxr] "=Q" (*(u8 *)vxr) - : [index] "d" (index), [v1] "I" (v1) - : "memory"); -} - -#endif /* CONFIG_CC_IS_CLANG */ - -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS #define fpu_vstm(_v1, _v3, _vxrs) \ ({ \ @@ -430,16 +423,14 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) } *_v = (void *)(_vxrs); \ \ instrument_write(_v, size); \ - asm volatile( \ - " la 1,%[vxrs]\n" \ - " VSTM %[v1],%[v3],0,1\n" \ - : [vxrs] "=R" (*_v) \ - : [v1] "I" (_v1), [v3] "I" (_v3) \ - : "memory", "1"); \ + asm volatile("VSTM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \ + : [vxrs] "=Q" (*_v) \ + : [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory"); \ (_v3) - (_v1) + 1; \ }) -#else /* CONFIG_CC_IS_CLANG */ +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ #define fpu_vstm(_v1, _v3, _vxrs) \ ({ \ @@ -449,14 +440,16 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) } *_v = (void *)(_vxrs); \ \ instrument_write(_v, size); \ - asm volatile("VSTM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \ - : [vxrs] "=Q" (*_v) \ - : [v1] "I" (_v1), [v3] "I" (_v3) \ - : "memory"); \ + asm volatile( \ + " la 1,%[vxrs]\n" \ + " VSTM %[v1],%[v3],0,1\n" \ + : [vxrs] "=R" (*_v) \ + : [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory", "1"); \ (_v3) - (_v1) + 1; \ }) -#endif /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vupllf(u8 v1, u8 v2) { diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index a3b73a4f626e..185331e91f83 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -51,6 +51,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) { return addr; } +#define ftrace_get_symaddr(fentry_ip) ((unsigned long)(fentry_ip)) #include <linux/ftrace_regs.h> diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index 752a2310f0d6..f5781794356b 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -2,80 +2,95 @@ #ifndef _ASM_S390_FUTEX_H #define _ASM_S390_FUTEX_H +#include <linux/instrumented.h> #include <linux/uaccess.h> #include <linux/futex.h> #include <asm/asm-extable.h> #include <asm/mmu_context.h> #include <asm/errno.h> -#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \ - asm volatile( \ - " sacf 256\n" \ - "0: l %1,0(%6)\n" \ - "1:"insn \ - "2: cs %1,%2,0(%6)\n" \ - "3: jl 1b\n" \ - " lhi %0,0\n" \ - "4: sacf 768\n" \ - EX_TABLE(0b,4b) EX_TABLE(1b,4b) \ - EX_TABLE(2b,4b) EX_TABLE(3b,4b) \ - : "=d" (ret), "=&d" (oldval), "=&d" (newval), \ - "=m" (*uaddr) \ - : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ - "m" (*uaddr) : "cc"); +#define FUTEX_OP_FUNC(name, insn) \ +static uaccess_kmsan_or_inline int \ +__futex_atomic_##name(int oparg, int *old, u32 __user *uaddr) \ +{ \ + int rc, new; \ + \ + instrument_copy_from_user_before(old, uaddr, sizeof(*old)); \ + asm_inline volatile( \ + " sacf 256\n" \ + "0: l %[old],%[uaddr]\n" \ + "1:"insn \ + "2: cs %[old],%[new],%[uaddr]\n" \ + "3: jl 1b\n" \ + " lhi %[rc],0\n" \ + "4: sacf 768\n" \ + EX_TABLE_UA_FAULT(0b, 4b, %[rc]) \ + EX_TABLE_UA_FAULT(1b, 4b, %[rc]) \ + EX_TABLE_UA_FAULT(2b, 4b, %[rc]) \ + EX_TABLE_UA_FAULT(3b, 4b, %[rc]) \ + : [rc] "=d" (rc), [old] "=&d" (*old), \ + [new] "=&d" (new), [uaddr] "+Q" (*uaddr) \ + : [oparg] "d" (oparg) \ + : "cc"); \ + if (!rc) \ + instrument_copy_from_user_after(old, uaddr, sizeof(*old), 0); \ + return rc; \ +} + +FUTEX_OP_FUNC(set, "lr %[new],%[oparg]\n") +FUTEX_OP_FUNC(add, "lr %[new],%[old]\n ar %[new],%[oparg]\n") +FUTEX_OP_FUNC(or, "lr %[new],%[old]\n or %[new],%[oparg]\n") +FUTEX_OP_FUNC(and, "lr %[new],%[old]\n nr %[new],%[oparg]\n") +FUTEX_OP_FUNC(xor, "lr %[new],%[old]\n xr %[new],%[oparg]\n") -static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, - u32 __user *uaddr) +static inline +int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) { - int oldval = 0, newval, ret; + int old, rc; switch (op) { case FUTEX_OP_SET: - __futex_atomic_op("lr %2,%5\n", - ret, oldval, newval, uaddr, oparg); + rc = __futex_atomic_set(oparg, &old, uaddr); break; case FUTEX_OP_ADD: - __futex_atomic_op("lr %2,%1\nar %2,%5\n", - ret, oldval, newval, uaddr, oparg); + rc = __futex_atomic_add(oparg, &old, uaddr); break; case FUTEX_OP_OR: - __futex_atomic_op("lr %2,%1\nor %2,%5\n", - ret, oldval, newval, uaddr, oparg); + rc = __futex_atomic_or(oparg, &old, uaddr); break; case FUTEX_OP_ANDN: - __futex_atomic_op("lr %2,%1\nnr %2,%5\n", - ret, oldval, newval, uaddr, ~oparg); + rc = __futex_atomic_and(~oparg, &old, uaddr); break; case FUTEX_OP_XOR: - __futex_atomic_op("lr %2,%1\nxr %2,%5\n", - ret, oldval, newval, uaddr, oparg); + rc = __futex_atomic_xor(oparg, &old, uaddr); break; default: - ret = -ENOSYS; + rc = -ENOSYS; } - - if (!ret) - *oval = oldval; - - return ret; + if (!rc) + *oval = old; + return rc; } -static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, - u32 oldval, u32 newval) +static uaccess_kmsan_or_inline +int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { - int ret; + int rc; - asm volatile( - " sacf 256\n" - "0: cs %1,%4,0(%5)\n" - "1: la %0,0\n" - "2: sacf 768\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) - : "=d" (ret), "+d" (oldval), "=m" (*uaddr) - : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) + instrument_copy_from_user_before(uval, uaddr, sizeof(*uval)); + asm_inline volatile( + " sacf 256\n" + "0: cs %[old],%[new],%[uaddr]\n" + "1: lhi %[rc],0\n" + "2: sacf 768\n" + EX_TABLE_UA_FAULT(0b, 2b, %[rc]) + EX_TABLE_UA_FAULT(1b, 2b, %[rc]) + : [rc] "=d" (rc), [old] "+d" (oldval), [uaddr] "+Q" (*uaddr) + : [new] "d" (newval) : "cc", "memory"); *uval = oldval; - return ret; + instrument_copy_from_user_after(uval, uaddr, sizeof(*uval), 0); + return rc; } #endif /* _ASM_S390_FUTEX_H */ diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 4f43cdd9835b..1ff145f7b52b 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -184,7 +184,11 @@ extern struct vm_layout vm_layout; #define __kaslr_offset vm_layout.kaslr_offset #define __kaslr_offset_phys vm_layout.kaslr_offset_phys +#ifdef CONFIG_RANDOMIZE_IDENTITY_BASE #define __identity_base vm_layout.identity_base +#else +#define __identity_base 0UL +#endif #define ident_map_size vm_layout.identity_size static inline unsigned long kaslr_offset(void) diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h index 51b68a43e195..7ef3bbec98b0 100644 --- a/arch/s390/include/asm/physmem_info.h +++ b/arch/s390/include/asm/physmem_info.h @@ -26,7 +26,7 @@ enum reserved_range_type { RR_AMODE31, RR_IPLREPORT, RR_CERT_COMP_LIST, - RR_MEM_DETECT_EXTENDED, + RR_MEM_DETECT_EXT, RR_VMEM, RR_MAX }; @@ -128,7 +128,7 @@ static inline const char *get_rr_type_name(enum reserved_range_type t) RR_TYPE_NAME(AMODE31); RR_TYPE_NAME(IPLREPORT); RR_TYPE_NAME(CERT_COMP_LIST); - RR_TYPE_NAME(MEM_DETECT_EXTENDED); + RR_TYPE_NAME(MEM_DETECT_EXT); RR_TYPE_NAME(VMEM); default: return "UNKNOWN"; diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 4da3b2956285..18f37dff03c9 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -172,6 +172,7 @@ void sclp_early_printk(const char *s); void __sclp_early_printk(const char *s, unsigned int len); void sclp_emergency_printk(const char *s); +int sclp_init(void); int sclp_early_get_memsize(unsigned long *mem); int sclp_early_get_hsa_size(unsigned long *hsa_size); int _sclp_get_core_info(struct sclp_core_info *info); diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index a81f897a81ce..f5920163ee97 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -22,16 +22,117 @@ void debug_user_asce(int exit); -unsigned long __must_check -raw_copy_from_user(void *to, const void __user *from, unsigned long n); +union oac { + unsigned int val; + struct { + struct { + unsigned short key : 4; + unsigned short : 4; + unsigned short as : 2; + unsigned short : 4; + unsigned short k : 1; + unsigned short a : 1; + } oac1; + struct { + unsigned short key : 4; + unsigned short : 4; + unsigned short as : 2; + unsigned short : 4; + unsigned short k : 1; + unsigned short a : 1; + } oac2; + }; +}; -unsigned long __must_check -raw_copy_to_user(void __user *to, const void *from, unsigned long n); +static __always_inline __must_check unsigned long +raw_copy_from_user_key(void *to, const void __user *from, unsigned long size, unsigned long key) +{ + unsigned long rem; + union oac spec = { + .oac2.key = key, + .oac2.as = PSW_BITS_AS_SECONDARY, + .oac2.k = 1, + .oac2.a = 1, + }; -#ifndef CONFIG_KASAN -#define INLINE_COPY_FROM_USER -#define INLINE_COPY_TO_USER -#endif + asm_inline volatile( + " lr %%r0,%[spec]\n" + "0: mvcos 0(%[to]),0(%[from]),%[size]\n" + "1: jz 5f\n" + " algr %[size],%[val]\n" + " slgr %[from],%[val]\n" + " slgr %[to],%[val]\n" + " j 0b\n" + "2: la %[rem],4095(%[from])\n" /* rem = from + 4095 */ + " nr %[rem],%[val]\n" /* rem = (from + 4095) & -4096 */ + " slgr %[rem],%[from]\n" + " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */ + " jnh 6f\n" + "3: mvcos 0(%[to]),0(%[from]),%[rem]\n" + "4: slgr %[size],%[rem]\n" + " j 6f\n" + "5: lghi %[size],0\n" + "6:\n" + EX_TABLE(0b, 2b) + EX_TABLE(1b, 2b) + EX_TABLE(3b, 6b) + EX_TABLE(4b, 6b) + : [size] "+&a" (size), [from] "+&a" (from), [to] "+&a" (to), [rem] "=&a" (rem) + : [val] "a" (-4096UL), [spec] "d" (spec.val) + : "cc", "memory", "0"); + return size; +} + +static __always_inline __must_check unsigned long +raw_copy_from_user(void *to, const void __user *from, unsigned long n) +{ + return raw_copy_from_user_key(to, from, n, 0); +} + +static __always_inline __must_check unsigned long +raw_copy_to_user_key(void __user *to, const void *from, unsigned long size, unsigned long key) +{ + unsigned long rem; + union oac spec = { + .oac1.key = key, + .oac1.as = PSW_BITS_AS_SECONDARY, + .oac1.k = 1, + .oac1.a = 1, + }; + + asm_inline volatile( + " lr %%r0,%[spec]\n" + "0: mvcos 0(%[to]),0(%[from]),%[size]\n" + "1: jz 5f\n" + " algr %[size],%[val]\n" + " slgr %[to],%[val]\n" + " slgr %[from],%[val]\n" + " j 0b\n" + "2: la %[rem],4095(%[to])\n" /* rem = to + 4095 */ + " nr %[rem],%[val]\n" /* rem = (to + 4095) & -4096 */ + " slgr %[rem],%[to]\n" + " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */ + " jnh 6f\n" + "3: mvcos 0(%[to]),0(%[from]),%[rem]\n" + "4: slgr %[size],%[rem]\n" + " j 6f\n" + "5: lghi %[size],0\n" + "6:\n" + EX_TABLE(0b, 2b) + EX_TABLE(1b, 2b) + EX_TABLE(3b, 6b) + EX_TABLE(4b, 6b) + : [size] "+&a" (size), [to] "+&a" (to), [from] "+&a" (from), [rem] "=&a" (rem) + : [val] "a" (-4096UL), [spec] "d" (spec.val) + : "cc", "memory", "0"); + return size; +} + +static __always_inline __must_check unsigned long +raw_copy_to_user(void __user *to, const void *from, unsigned long n) +{ + return raw_copy_to_user_key(to, from, n, 0); +} unsigned long __must_check _copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key); @@ -55,63 +156,71 @@ copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned lo return n; } -union oac { - unsigned int val; - struct { - struct { - unsigned short key : 4; - unsigned short : 4; - unsigned short as : 2; - unsigned short : 4; - unsigned short k : 1; - unsigned short a : 1; - } oac1; - struct { - unsigned short key : 4; - unsigned short : 4; - unsigned short as : 2; - unsigned short : 4; - unsigned short k : 1; - unsigned short a : 1; - } oac2; - }; -}; - int __noreturn __put_user_bad(void); #ifdef CONFIG_KMSAN -#define get_put_user_noinstr_attributes \ - noinline __maybe_unused __no_sanitize_memory +#define uaccess_kmsan_or_inline noinline __maybe_unused __no_sanitize_memory #else -#define get_put_user_noinstr_attributes __always_inline +#define uaccess_kmsan_or_inline __always_inline #endif -#define DEFINE_PUT_USER(type) \ -static get_put_user_noinstr_attributes int \ +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT + +#define DEFINE_PUT_USER_NOINSTR(type) \ +static uaccess_kmsan_or_inline int \ +__put_user_##type##_noinstr(unsigned type __user *to, \ + unsigned type *from, \ + unsigned long size) \ +{ \ + asm goto( \ + " llilh %%r0,%[spec]\n" \ + "0: mvcos %[to],%[from],%[size]\n" \ + "1: nopr %%r7\n" \ + EX_TABLE(0b, %l[Efault]) \ + EX_TABLE(1b, %l[Efault]) \ + : [to] "+Q" (*to) \ + : [size] "d" (size), [from] "Q" (*from), \ + [spec] "I" (0x81) \ + : "cc", "0" \ + : Efault \ + ); \ + return 0; \ +Efault: \ + return -EFAULT; \ +} + +#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + +#define DEFINE_PUT_USER_NOINSTR(type) \ +static uaccess_kmsan_or_inline int \ __put_user_##type##_noinstr(unsigned type __user *to, \ unsigned type *from, \ unsigned long size) \ { \ - union oac __oac_spec = { \ - .oac1.as = PSW_BITS_AS_SECONDARY, \ - .oac1.a = 1, \ - }; \ int rc; \ \ asm volatile( \ - " lr 0,%[spec]\n" \ - "0: mvcos %[_to],%[_from],%[_size]\n" \ - "1: xr %[rc],%[rc]\n" \ + " llilh %%r0,%[spec]\n" \ + "0: mvcos %[to],%[from],%[size]\n" \ + "1: lhi %[rc],0\n" \ "2:\n" \ - EX_TABLE_UA_STORE(0b, 2b, %[rc]) \ - EX_TABLE_UA_STORE(1b, 2b, %[rc]) \ - : [rc] "=&d" (rc), [_to] "+Q" (*(to)) \ - : [_size] "d" (size), [_from] "Q" (*(from)), \ - [spec] "d" (__oac_spec.val) \ + EX_TABLE_UA_FAULT(0b, 2b, %[rc]) \ + EX_TABLE_UA_FAULT(1b, 2b, %[rc]) \ + : [rc] "=d" (rc), [to] "+Q" (*to) \ + : [size] "d" (size), [from] "Q" (*from), \ + [spec] "I" (0x81) \ : "cc", "0"); \ return rc; \ -} \ - \ +} + +#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + +DEFINE_PUT_USER_NOINSTR(char); +DEFINE_PUT_USER_NOINSTR(short); +DEFINE_PUT_USER_NOINSTR(int); +DEFINE_PUT_USER_NOINSTR(long); + +#define DEFINE_PUT_USER(type) \ static __always_inline int \ __put_user_##type(unsigned type __user *to, unsigned type *from, \ unsigned long size) \ @@ -128,69 +237,111 @@ DEFINE_PUT_USER(short); DEFINE_PUT_USER(int); DEFINE_PUT_USER(long); -static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) -{ - int rc; +#define __put_user(x, ptr) \ +({ \ + __typeof__(*(ptr)) __x = (x); \ + int __prc; \ + \ + __chk_user_ptr(ptr); \ + switch (sizeof(*(ptr))) { \ + case 1: \ + __prc = __put_user_char((unsigned char __user *)(ptr), \ + (unsigned char *)&__x, \ + sizeof(*(ptr))); \ + break; \ + case 2: \ + __prc = __put_user_short((unsigned short __user *)(ptr),\ + (unsigned short *)&__x, \ + sizeof(*(ptr))); \ + break; \ + case 4: \ + __prc = __put_user_int((unsigned int __user *)(ptr), \ + (unsigned int *)&__x, \ + sizeof(*(ptr))); \ + break; \ + case 8: \ + __prc = __put_user_long((unsigned long __user *)(ptr), \ + (unsigned long *)&__x, \ + sizeof(*(ptr))); \ + break; \ + default: \ + __prc = __put_user_bad(); \ + break; \ + } \ + __builtin_expect(__prc, 0); \ +}) - switch (size) { - case 1: - rc = __put_user_char((unsigned char __user *)ptr, - (unsigned char *)x, - size); - break; - case 2: - rc = __put_user_short((unsigned short __user *)ptr, - (unsigned short *)x, - size); - break; - case 4: - rc = __put_user_int((unsigned int __user *)ptr, - (unsigned int *)x, - size); - break; - case 8: - rc = __put_user_long((unsigned long __user *)ptr, - (unsigned long *)x, - size); - break; - default: - __put_user_bad(); - break; - } - return rc; -} +#define put_user(x, ptr) \ +({ \ + might_fault(); \ + __put_user(x, ptr); \ +}) int __noreturn __get_user_bad(void); -#define DEFINE_GET_USER(type) \ -static get_put_user_noinstr_attributes int \ +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT + +#define DEFINE_GET_USER_NOINSTR(type) \ +static uaccess_kmsan_or_inline int \ __get_user_##type##_noinstr(unsigned type *to, \ - unsigned type __user *from, \ + const unsigned type __user *from, \ + unsigned long size) \ +{ \ + asm goto( \ + " lhi %%r0,%[spec]\n" \ + "0: mvcos %[to],%[from],%[size]\n" \ + "1: nopr %%r7\n" \ + EX_TABLE(0b, %l[Efault]) \ + EX_TABLE(1b, %l[Efault]) \ + : [to] "=Q" (*to) \ + : [size] "d" (size), [from] "Q" (*from), \ + [spec] "I" (0x81) \ + : "cc", "0" \ + : Efault \ + ); \ + return 0; \ +Efault: \ + *to = 0; \ + return -EFAULT; \ +} + +#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + +#define DEFINE_GET_USER_NOINSTR(type) \ +static uaccess_kmsan_or_inline int \ +__get_user_##type##_noinstr(unsigned type *to, \ + const unsigned type __user *from, \ unsigned long size) \ { \ - union oac __oac_spec = { \ - .oac2.as = PSW_BITS_AS_SECONDARY, \ - .oac2.a = 1, \ - }; \ int rc; \ \ asm volatile( \ - " lr 0,%[spec]\n" \ - "0: mvcos 0(%[_to]),%[_from],%[_size]\n" \ - "1: xr %[rc],%[rc]\n" \ + " lhi %%r0,%[spec]\n" \ + "0: mvcos %[to],%[from],%[size]\n" \ + "1: lhi %[rc],0\n" \ "2:\n" \ - EX_TABLE_UA_LOAD_MEM(0b, 2b, %[rc], %[_to], %[_ksize]) \ - EX_TABLE_UA_LOAD_MEM(1b, 2b, %[rc], %[_to], %[_ksize]) \ - : [rc] "=&d" (rc), "=Q" (*(to)) \ - : [_size] "d" (size), [_from] "Q" (*(from)), \ - [spec] "d" (__oac_spec.val), [_to] "a" (to), \ - [_ksize] "K" (size) \ + EX_TABLE_UA_FAULT(0b, 2b, %[rc]) \ + EX_TABLE_UA_FAULT(1b, 2b, %[rc]) \ + : [rc] "=d" (rc), [to] "=Q" (*to) \ + : [size] "d" (size), [from] "Q" (*from), \ + [spec] "I" (0x81) \ : "cc", "0"); \ + if (likely(!rc)) \ + return 0; \ + *to = 0; \ return rc; \ -} \ - \ +} + +#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + +DEFINE_GET_USER_NOINSTR(char); +DEFINE_GET_USER_NOINSTR(short); +DEFINE_GET_USER_NOINSTR(int); +DEFINE_GET_USER_NOINSTR(long); + +#define DEFINE_GET_USER(type) \ static __always_inline int \ -__get_user_##type(unsigned type *to, unsigned type __user *from, \ +__get_user_##type(unsigned type *to, const unsigned type __user *from, \ unsigned long size) \ { \ int rc; \ @@ -205,107 +356,50 @@ DEFINE_GET_USER(short); DEFINE_GET_USER(int); DEFINE_GET_USER(long); -static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) -{ - int rc; - - switch (size) { - case 1: - rc = __get_user_char((unsigned char *)x, - (unsigned char __user *)ptr, - size); - break; - case 2: - rc = __get_user_short((unsigned short *)x, - (unsigned short __user *)ptr, - size); - break; - case 4: - rc = __get_user_int((unsigned int *)x, - (unsigned int __user *)ptr, - size); - break; - case 8: - rc = __get_user_long((unsigned long *)x, - (unsigned long __user *)ptr, - size); - break; - default: - __get_user_bad(); - break; - } - return rc; -} - -/* - * These are the main single-value transfer routines. They automatically - * use the right size if we just have the right pointer type. - */ -#define __put_user(x, ptr) \ -({ \ - __typeof__(*(ptr)) __x = (x); \ - int __pu_err = -EFAULT; \ - \ - __chk_user_ptr(ptr); \ - switch (sizeof(*(ptr))) { \ - case 1: \ - case 2: \ - case 4: \ - case 8: \ - __pu_err = __put_user_fn(&__x, ptr, sizeof(*(ptr))); \ - break; \ - default: \ - __put_user_bad(); \ - break; \ - } \ - __builtin_expect(__pu_err, 0); \ -}) - -#define put_user(x, ptr) \ -({ \ - might_fault(); \ - __put_user(x, ptr); \ -}) - #define __get_user(x, ptr) \ ({ \ - int __gu_err = -EFAULT; \ + const __user void *____guptr = (ptr); \ + int __grc; \ \ __chk_user_ptr(ptr); \ switch (sizeof(*(ptr))) { \ case 1: { \ + const unsigned char __user *__guptr = ____guptr; \ unsigned char __x; \ \ - __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \ + __grc = __get_user_char(&__x, __guptr, sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *)&__x; \ break; \ }; \ case 2: { \ + const unsigned short __user *__guptr = ____guptr; \ unsigned short __x; \ \ - __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \ + __grc = __get_user_short(&__x, __guptr, sizeof(*(ptr)));\ (x) = *(__force __typeof__(*(ptr)) *)&__x; \ break; \ }; \ case 4: { \ + const unsigned int __user *__guptr = ____guptr; \ unsigned int __x; \ \ - __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \ + __grc = __get_user_int(&__x, __guptr, sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *)&__x; \ break; \ }; \ case 8: { \ + const unsigned long __user *__guptr = ____guptr; \ unsigned long __x; \ \ - __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \ + __grc = __get_user_long(&__x, __guptr, sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *)&__x; \ break; \ }; \ default: \ - __get_user_bad(); \ + __grc = __get_user_bad(); \ break; \ } \ - __builtin_expect(__gu_err, 0); \ + __builtin_expect(__grc, 0); \ }) #define get_user(x, ptr) \ @@ -341,109 +435,71 @@ static inline void *s390_kernel_write(void *dst, const void *src, size_t size) return __s390_kernel_write(dst, src, size); } -int __noreturn __put_kernel_bad(void); +void __noreturn __mvc_kernel_nofault_bad(void); -#define __put_kernel_asm(val, to, insn) \ -({ \ - int __rc; \ - \ - asm volatile( \ - "0: " insn " %[_val],%[_to]\n" \ - "1: xr %[rc],%[rc]\n" \ - "2:\n" \ - EX_TABLE_UA_STORE(0b, 2b, %[rc]) \ - EX_TABLE_UA_STORE(1b, 2b, %[rc]) \ - : [rc] "=d" (__rc), [_to] "+Q" (*(to)) \ - : [_val] "d" (val) \ - : "cc"); \ - __rc; \ -}) +#if defined(CONFIG_CC_HAS_ASM_GOTO_OUTPUT) && defined(CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS) -#define __put_kernel_nofault(dst, src, type, err_label) \ +#define __mvc_kernel_nofault(dst, src, type, err_label) \ do { \ - unsigned long __x = (unsigned long)(*((type *)(src))); \ - int __pk_err; \ - \ switch (sizeof(type)) { \ case 1: \ - __pk_err = __put_kernel_asm(__x, (type *)(dst), "stc"); \ - break; \ case 2: \ - __pk_err = __put_kernel_asm(__x, (type *)(dst), "sth"); \ - break; \ case 4: \ - __pk_err = __put_kernel_asm(__x, (type *)(dst), "st"); \ - break; \ case 8: \ - __pk_err = __put_kernel_asm(__x, (type *)(dst), "stg"); \ + asm goto( \ + "0: mvc %O[_dst](%[_len],%R[_dst]),%[_src]\n" \ + "1: nopr %%r7\n" \ + EX_TABLE(0b, %l[err_label]) \ + EX_TABLE(1b, %l[err_label]) \ + : [_dst] "=Q" (*(type *)dst) \ + : [_src] "Q" (*(type *)(src)), \ + [_len] "I" (sizeof(type)) \ + : \ + : err_label); \ break; \ default: \ - __pk_err = __put_kernel_bad(); \ + __mvc_kernel_nofault_bad(); \ break; \ } \ - if (unlikely(__pk_err)) \ - goto err_label; \ } while (0) -int __noreturn __get_kernel_bad(void); - -#define __get_kernel_asm(val, from, insn) \ -({ \ - int __rc; \ - \ - asm volatile( \ - "0: " insn " %[_val],%[_from]\n" \ - "1: xr %[rc],%[rc]\n" \ - "2:\n" \ - EX_TABLE_UA_LOAD_REG(0b, 2b, %[rc], %[_val]) \ - EX_TABLE_UA_LOAD_REG(1b, 2b, %[rc], %[_val]) \ - : [rc] "=d" (__rc), [_val] "=d" (val) \ - : [_from] "Q" (*(from)) \ - : "cc"); \ - __rc; \ -}) +#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT) && CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ -#define __get_kernel_nofault(dst, src, type, err_label) \ +#define __mvc_kernel_nofault(dst, src, type, err_label) \ do { \ - int __gk_err; \ + type *(__dst) = (type *)(dst); \ + int __rc; \ \ switch (sizeof(type)) { \ - case 1: { \ - unsigned char __x; \ - \ - __gk_err = __get_kernel_asm(__x, (type *)(src), "ic"); \ - *((type *)(dst)) = (type)__x; \ - break; \ - }; \ - case 2: { \ - unsigned short __x; \ - \ - __gk_err = __get_kernel_asm(__x, (type *)(src), "lh"); \ - *((type *)(dst)) = (type)__x; \ - break; \ - }; \ - case 4: { \ - unsigned int __x; \ - \ - __gk_err = __get_kernel_asm(__x, (type *)(src), "l"); \ - *((type *)(dst)) = (type)__x; \ - break; \ - }; \ - case 8: { \ - unsigned long __x; \ - \ - __gk_err = __get_kernel_asm(__x, (type *)(src), "lg"); \ - *((type *)(dst)) = (type)__x; \ + case 1: \ + case 2: \ + case 4: \ + case 8: \ + asm_inline volatile( \ + "0: mvc 0(%[_len],%[_dst]),%[_src]\n" \ + "1: lhi %[_rc],0\n" \ + "2:\n" \ + EX_TABLE_UA_FAULT(0b, 2b, %[_rc]) \ + EX_TABLE_UA_FAULT(1b, 2b, %[_rc]) \ + : [_rc] "=d" (__rc), \ + "=m" (*__dst) \ + : [_src] "Q" (*(type *)(src)), \ + [_dst] "a" (__dst), \ + [_len] "I" (sizeof(type))); \ + if (__rc) \ + goto err_label; \ break; \ - }; \ default: \ - __gk_err = __get_kernel_bad(); \ + __mvc_kernel_nofault_bad(); \ break; \ } \ - if (unlikely(__gk_err)) \ - goto err_label; \ } while (0) +#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT && CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ + +#define __get_kernel_nofault __mvc_kernel_nofault +#define __put_kernel_nofault __mvc_kernel_nofault + void __cmpxchg_user_key_called_with_bad_pointer(void); #define CMPXCHG_USER_KEY_MAX_LOOPS 128 diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index ba6b7329a10e..ce038e9205f7 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -1122,7 +1122,7 @@ static int s390dbf_procactive(const struct ctl_table *table, int write, return 0; } -static struct ctl_table s390dbf_table[] = { +static const struct ctl_table s390dbf_table[] = { { .procname = "debug_stoppable", .data = &debug_stoppable, diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 62f8f5a750a3..2fa25164df7d 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -50,6 +50,7 @@ decompressor_handled_param(facilities); decompressor_handled_param(nokaslr); decompressor_handled_param(cmma); decompressor_handled_param(relocate_lowcore); +decompressor_handled_param(bootdebug); #if IS_ENABLED(CONFIG_KVM) decompressor_handled_param(prot_virt); #endif @@ -58,7 +59,7 @@ static void __init kasan_early_init(void) { #ifdef CONFIG_KASAN init_task.kasan_depth = 0; - sclp_early_printk("KernelAddressSanitizer initialized\n"); + pr_info("KernelAddressSanitizer initialized\n"); #endif } diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index c0b2c97efefb..63ba6306632e 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -266,18 +266,13 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { unsigned long *parent = &arch_ftrace_regs(fregs)->regs.gprs[14]; - int bit; if (unlikely(ftrace_graph_is_dead())) return; if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; - bit = ftrace_test_recursion_trylock(ip, *parent); - if (bit < 0) - return; if (!function_graph_enter_regs(*parent, ip, 0, parent, fregs)) *parent = (unsigned long)&return_to_handler; - ftrace_test_recursion_unlock(bit); } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/s390/kernel/hiperdispatch.c b/arch/s390/kernel/hiperdispatch.c index 2a99a216ab62..7857a7e8e56c 100644 --- a/arch/s390/kernel/hiperdispatch.c +++ b/arch/s390/kernel/hiperdispatch.c @@ -292,7 +292,7 @@ static int hiperdispatch_ctl_handler(const struct ctl_table *ctl, int write, return 0; } -static struct ctl_table hiperdispatch_ctl_table[] = { +static const struct ctl_table hiperdispatch_ctl_table[] = { { .procname = "hiperdispatch", .mode = 0644, diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 1298f0860733..d78bcfe707b5 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -157,6 +157,12 @@ u64 __bootdata_preserved(stfle_fac_list[16]); EXPORT_SYMBOL(stfle_fac_list); struct oldmem_data __bootdata_preserved(oldmem_data); +char __bootdata(boot_rb)[PAGE_SIZE * 2]; +bool __bootdata(boot_earlyprintk); +size_t __bootdata(boot_rb_off); +char __bootdata(bootdebug_filter)[128]; +bool __bootdata(bootdebug); + unsigned long __bootdata_preserved(VMALLOC_START); EXPORT_SYMBOL(VMALLOC_START); @@ -686,7 +692,7 @@ static void __init reserve_physmem_info(void) { unsigned long addr, size; - if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size)) + if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size)) memblock_reserve(addr, size); } @@ -694,7 +700,7 @@ static void __init free_physmem_info(void) { unsigned long addr, size; - if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size)) + if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size)) memblock_phys_free(addr, size); } @@ -724,7 +730,7 @@ static void __init reserve_lowcore(void) void *lowcore_end = lowcore_start + sizeof(struct lowcore); void *start, *end; - if ((void *)__identity_base < lowcore_end) { + if (absolute_pointer(__identity_base) < lowcore_end) { start = max(lowcore_start, (void *)__identity_base); end = min(lowcore_end, (void *)(__identity_base + ident_map_size)); memblock_reserve(__pa(start), __pa(end)); @@ -866,6 +872,23 @@ static void __init log_component_list(void) } /* + * Print avoiding interpretation of % in buf and taking bootdebug option + * into consideration. + */ +static void __init print_rb_entry(const char *buf) +{ + char fmt[] = KERN_SOH "0boot: %s"; + int level = printk_get_level(buf); + + buf = skip_timestamp(printk_skip_level(buf)); + if (level == KERN_DEBUG[1] && (!bootdebug || !bootdebug_filter_match(buf))) + return; + + fmt[1] = level; + printk(fmt, buf); +} + +/* * Setup function called from init/main.c just after the banner * was printed. */ @@ -884,6 +907,9 @@ void __init setup_arch(char **cmdline_p) pr_info("Linux is running natively in 64-bit mode\n"); else pr_info("Linux is running as a guest in 64-bit mode\n"); + /* Print decompressor messages if not already printed */ + if (!boot_earlyprintk) + boot_rb_foreach(print_rb_entry); if (have_relocated_lowcore()) pr_info("Lowcore relocated to 0x%px\n", get_lowcore()); @@ -987,3 +1013,8 @@ void __init setup_arch(char **cmdline_p) /* Add system specific data to the random pool */ setup_randomness(); } + +void __init arch_cpu_finalize_init(void) +{ + sclp_init(); +} diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 3808f942a433..211cc8382e4a 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -667,7 +667,7 @@ static int polarization_ctl_handler(const struct ctl_table *ctl, int write, return set_polarization(polarization); } -static struct ctl_table topology_ctl_table[] = { +static const struct ctl_table topology_ctl_table[] = { { .procname = "topology", .mode = 0644, diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 377b9aaf8c92..ff1ddba96352 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -52,7 +52,6 @@ SECTIONS SOFTIRQENTRY_TEXT FTRACE_HOTPATCH_TRAMPOLINES_TEXT *(.text.*_indirect_*) - *(.fixup) *(.gnu.warning) . = ALIGN(PAGE_SIZE); _etext = .; /* End of text section */ diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index c7c269d5c491..f977b7c37efc 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -31,51 +31,6 @@ void debug_user_asce(int exit) } #endif /*CONFIG_DEBUG_ENTRY */ -static unsigned long raw_copy_from_user_key(void *to, const void __user *from, - unsigned long size, unsigned long key) -{ - unsigned long rem; - union oac spec = { - .oac2.key = key, - .oac2.as = PSW_BITS_AS_SECONDARY, - .oac2.k = 1, - .oac2.a = 1, - }; - - asm volatile( - " lr 0,%[spec]\n" - "0: mvcos 0(%[to]),0(%[from]),%[size]\n" - "1: jz 5f\n" - " algr %[size],%[val]\n" - " slgr %[from],%[val]\n" - " slgr %[to],%[val]\n" - " j 0b\n" - "2: la %[rem],4095(%[from])\n" /* rem = from + 4095 */ - " nr %[rem],%[val]\n" /* rem = (from + 4095) & -4096 */ - " slgr %[rem],%[from]\n" - " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */ - " jnh 6f\n" - "3: mvcos 0(%[to]),0(%[from]),%[rem]\n" - "4: slgr %[size],%[rem]\n" - " j 6f\n" - "5: slgr %[size],%[size]\n" - "6:\n" - EX_TABLE(0b, 2b) - EX_TABLE(1b, 2b) - EX_TABLE(3b, 6b) - EX_TABLE(4b, 6b) - : [size] "+&a" (size), [from] "+&a" (from), [to] "+&a" (to), [rem] "=&a" (rem) - : [val] "a" (-4096UL), [spec] "d" (spec.val) - : "cc", "memory", "0"); - return size; -} - -unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) -{ - return raw_copy_from_user_key(to, from, n, 0); -} -EXPORT_SYMBOL(raw_copy_from_user); - unsigned long _copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key) { @@ -93,51 +48,6 @@ unsigned long _copy_from_user_key(void *to, const void __user *from, } EXPORT_SYMBOL(_copy_from_user_key); -static unsigned long raw_copy_to_user_key(void __user *to, const void *from, - unsigned long size, unsigned long key) -{ - unsigned long rem; - union oac spec = { - .oac1.key = key, - .oac1.as = PSW_BITS_AS_SECONDARY, - .oac1.k = 1, - .oac1.a = 1, - }; - - asm volatile( - " lr 0,%[spec]\n" - "0: mvcos 0(%[to]),0(%[from]),%[size]\n" - "1: jz 5f\n" - " algr %[size],%[val]\n" - " slgr %[to],%[val]\n" - " slgr %[from],%[val]\n" - " j 0b\n" - "2: la %[rem],4095(%[to])\n" /* rem = to + 4095 */ - " nr %[rem],%[val]\n" /* rem = (to + 4095) & -4096 */ - " slgr %[rem],%[to]\n" - " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */ - " jnh 6f\n" - "3: mvcos 0(%[to]),0(%[from]),%[rem]\n" - "4: slgr %[size],%[rem]\n" - " j 6f\n" - "5: slgr %[size],%[size]\n" - "6:\n" - EX_TABLE(0b, 2b) - EX_TABLE(1b, 2b) - EX_TABLE(3b, 6b) - EX_TABLE(4b, 6b) - : [size] "+&a" (size), [to] "+&a" (to), [from] "+&a" (from), [rem] "=&a" (rem) - : [val] "a" (-4096UL), [spec] "d" (spec.val) - : "cc", "memory", "0"); - return size; -} - -unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) -{ - return raw_copy_to_user_key(to, from, n, 0); -} -EXPORT_SYMBOL(raw_copy_to_user); - unsigned long _copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned long key) { diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 7bf0f691827b..39f44b6256e0 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -332,7 +332,7 @@ static int cmm_timeout_handler(const struct ctl_table *ctl, int write, return 0; } -static struct ctl_table cmm_table[] = { +static const struct ctl_table cmm_table[] = { { .procname = "cmm_pages", .mode = 0644, diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c index 0a0738a473af..a046be1715cf 100644 --- a/arch/s390/mm/extable.c +++ b/arch/s390/mm/extable.c @@ -7,6 +7,7 @@ #include <linux/panic.h> #include <asm/asm-extable.h> #include <asm/extable.h> +#include <asm/fpu.h> const struct exception_table_entry *s390_search_extables(unsigned long addr) { @@ -26,7 +27,7 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex, struct pt_r return true; } -static bool ex_handler_ua_store(const struct exception_table_entry *ex, struct pt_regs *regs) +static bool ex_handler_ua_fault(const struct exception_table_entry *ex, struct pt_regs *regs) { unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data); @@ -35,18 +36,6 @@ static bool ex_handler_ua_store(const struct exception_table_entry *ex, struct p return true; } -static bool ex_handler_ua_load_mem(const struct exception_table_entry *ex, struct pt_regs *regs) -{ - unsigned int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data); - unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data); - size_t len = FIELD_GET(EX_DATA_LEN, ex->data); - - regs->gprs[reg_err] = -EFAULT; - memset((void *)regs->gprs[reg_addr], 0, len); - regs->psw.addr = extable_fixup(ex); - return true; -} - static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex, bool pair, struct pt_regs *regs) { @@ -77,6 +66,13 @@ static bool ex_handler_zeropad(const struct exception_table_entry *ex, struct pt return true; } +static bool ex_handler_fpc(const struct exception_table_entry *ex, struct pt_regs *regs) +{ + fpu_sfpc(0); + regs->psw.addr = extable_fixup(ex); + return true; +} + bool fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *ex; @@ -89,16 +85,16 @@ bool fixup_exception(struct pt_regs *regs) return ex_handler_fixup(ex, regs); case EX_TYPE_BPF: return ex_handler_bpf(ex, regs); - case EX_TYPE_UA_STORE: - return ex_handler_ua_store(ex, regs); - case EX_TYPE_UA_LOAD_MEM: - return ex_handler_ua_load_mem(ex, regs); + case EX_TYPE_UA_FAULT: + return ex_handler_ua_fault(ex, regs); case EX_TYPE_UA_LOAD_REG: return ex_handler_ua_load_reg(ex, false, regs); case EX_TYPE_UA_LOAD_REGPAIR: return ex_handler_ua_load_reg(ex, true, regs); case EX_TYPE_ZEROPAD: return ex_handler_zeropad(ex, regs); + case EX_TYPE_FPC: + return ex_handler_fpc(ex, regs); } panic("invalid exception table entry"); } diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index d33f55b7ee98..cd2fef79ad2c 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -21,7 +21,7 @@ int page_table_allocate_pgste = 0; EXPORT_SYMBOL(page_table_allocate_pgste); -static struct ctl_table page_table_sysctl[] = { +static const struct ctl_table page_table_sysctl[] = { { .procname = "allocate_pgste", .data = &page_table_allocate_pgste, diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 7c684c54e721..8ead999e340b 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -662,7 +662,7 @@ void __init vmem_map_init(void) if (!static_key_enabled(&cpu_has_bear)) set_memory_x(0, 1); if (debug_pagealloc_enabled()) - __set_memory_4k(__va(0), __va(0) + ident_map_size); + __set_memory_4k(__va(0), absolute_pointer(__va(0)) + ident_map_size); pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); } diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index d5ace00d10f0..857afbc4828f 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -171,7 +171,6 @@ void zpci_bus_scan_busses(void) static bool zpci_bus_is_multifunction_root(struct zpci_dev *zdev) { return !s390_pci_no_rid && zdev->rid_available && - zpci_is_device_configured(zdev) && !zdev->vfn; } diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index 24eccaa29337..bdcf2a3b6c41 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -13,7 +13,7 @@ CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY $(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE $(call if_changed_rule,as_o_S) -KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes +KBUILD_CFLAGS := -std=gnu11 -fno-strict-aliasing -Wall -Wstrict-prototypes KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding KBUILD_CFLAGS += -Os -m64 -msoft-float -fno-common diff --git a/arch/s390/tools/gen_opcode_table.c b/arch/s390/tools/gen_opcode_table.c index a1bc02b29c81..7d76c417f83f 100644 --- a/arch/s390/tools/gen_opcode_table.c +++ b/arch/s390/tools/gen_opcode_table.c @@ -201,6 +201,17 @@ static int cmp_long_insn(const void *a, const void *b) return strcmp(((struct insn *)a)->name, ((struct insn *)b)->name); } +static void print_insn_name(const char *name) +{ + size_t i, len; + + len = strlen(name); + printf("{"); + for (i = 0; i < len; i++) + printf(" \'%c\',", name[i]); + printf(" }"); +} + static void print_long_insn(struct gen_opcode *desc) { struct insn *insn; @@ -223,7 +234,9 @@ static void print_long_insn(struct gen_opcode *desc) insn = &desc->insn[i]; if (insn->name_len < 6) continue; - printf("\t[LONG_INSN_%s] = \"%s\", \\\n", insn->upper, insn->name); + printf("\t[LONG_INSN_%s] = ", insn->upper); + print_insn_name(insn->name); + printf(", \\\n"); } printf("}\n\n"); } @@ -236,11 +249,13 @@ static void print_opcode(struct insn *insn, int nr) if (insn->type->byte != 0) opcode += 2; printf("\t[%4d] = { .opfrag = 0x%s, .format = INSTR_%s, ", nr, opcode, insn->format); - if (insn->name_len < 6) - printf(".name = \"%s\" ", insn->name); - else - printf(".offset = LONG_INSN_%s ", insn->upper); - printf("}, \\\n"); + if (insn->name_len < 6) { + printf(".name = "); + print_insn_name(insn->name); + } else { + printf(".offset = LONG_INSN_%s", insn->upper); + } + printf(" }, \\\n"); } static void add_to_group(struct gen_opcode *desc, struct insn *insn, int offset) diff --git a/arch/sparc/kernel/vio.c b/arch/sparc/kernel/vio.c index 07933d75ac81..1a1a9d6b8f2e 100644 --- a/arch/sparc/kernel/vio.c +++ b/arch/sparc/kernel/vio.c @@ -419,13 +419,13 @@ struct vio_remove_node_data { u64 node; }; -static int vio_md_node_match(struct device *dev, void *arg) +static int vio_md_node_match(struct device *dev, const void *arg) { struct vio_dev *vdev = to_vio_dev(dev); - struct vio_remove_node_data *node_data; + const struct vio_remove_node_data *node_data; u64 node; - node_data = (struct vio_remove_node_data *)arg; + node_data = (const struct vio_remove_node_data *)arg; node = vio_vdev_node(node_data->hp, vdev); diff --git a/arch/um/drivers/rtc_kern.c b/arch/um/drivers/rtc_kern.c index 134a58f93c85..9158c936c128 100644 --- a/arch/um/drivers/rtc_kern.c +++ b/arch/um/drivers/rtc_kern.c @@ -51,6 +51,7 @@ static int uml_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) static int uml_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) { + struct timespec64 ts; unsigned long long secs; if (!enable && !uml_rtc_alarm_enabled) @@ -58,7 +59,8 @@ static int uml_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) uml_rtc_alarm_enabled = enable; - secs = uml_rtc_alarm_time - ktime_get_real_seconds(); + read_persistent_clock64(&ts); + secs = uml_rtc_alarm_time - ts.tv_sec; if (time_travel_mode == TT_MODE_OFF) { if (!enable) { @@ -73,7 +75,8 @@ static int uml_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) if (enable) time_travel_add_event_rel(¨_rtc_alarm_event, - secs * NSEC_PER_SEC); + secs * NSEC_PER_SEC - + ts.tv_nsec); } return 0; diff --git a/arch/um/include/asm/fixmap.h b/arch/um/include/asm/fixmap.h deleted file mode 100644 index 2efac5827188..000000000000 --- a/arch/um/include/asm/fixmap.h +++ /dev/null @@ -1,56 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __UM_FIXMAP_H -#define __UM_FIXMAP_H - -#include <asm/processor.h> -#include <asm/archparam.h> -#include <asm/page.h> -#include <linux/threads.h> - -/* - * Here we define all the compile-time 'special' virtual - * addresses. The point is to have a constant address at - * compile time, but to set the physical address only - * in the boot process. We allocate these special addresses - * from the end of virtual memory (0xfffff000) backwards. - * Also this lets us do fail-safe vmalloc(), we - * can guarantee that these special addresses and - * vmalloc()-ed addresses never overlap. - * - * these 'compile-time allocated' memory buffers are - * fixed-size 4k pages. (or larger if used with an increment - * highger than 1) use fixmap_set(idx,phys) to associate - * physical memory with fixmap indices. - * - * TLB entries of such buffers will not be flushed across - * task switches. - */ - -/* - * on UP currently we will have no trace of the fixmap mechanizm, - * no page table allocations, etc. This might change in the - * future, say framebuffers for the console driver(s) could be - * fix-mapped? - */ -enum fixed_addresses { - __end_of_fixed_addresses -}; - -extern void __set_fixmap (enum fixed_addresses idx, - unsigned long phys, pgprot_t flags); - -/* - * used by vmalloc.c. - * - * Leave one empty page between vmalloc'ed areas and - * the start of the fixmap, and leave one page empty - * at the top of mem.. - */ - -#define FIXADDR_TOP (TASK_SIZE - 2 * PAGE_SIZE) -#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) - -#include <asm-generic/fixmap.h> - -#endif diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index 0bd60afcc37d..5601ca98e8a6 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -8,7 +8,8 @@ #ifndef __UM_PGTABLE_H #define __UM_PGTABLE_H -#include <asm/fixmap.h> +#include <asm/page.h> +#include <linux/mm_types.h> #define _PAGE_PRESENT 0x001 #define _PAGE_NEEDSYNC 0x002 @@ -48,11 +49,9 @@ extern unsigned long end_iomem; #define VMALLOC_OFFSET (__va_space) #define VMALLOC_START ((end_iomem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) -#define PKMAP_BASE ((FIXADDR_START - LAST_PKMAP * PAGE_SIZE) & PMD_MASK) -#define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) +#define VMALLOC_END (TASK_SIZE-2*PAGE_SIZE) #define MODULES_VADDR VMALLOC_START #define MODULES_END VMALLOC_END -#define MODULES_LEN (MODULES_VADDR - MODULES_END) #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index d98812907493..befed230aac2 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -9,7 +9,6 @@ #include <linux/mm.h> #include <linux/swap.h> #include <linux/slab.h> -#include <asm/fixmap.h> #include <asm/page.h> #include <asm/pgalloc.h> #include <as-layout.h> @@ -74,6 +73,7 @@ void __init mem_init(void) kmalloc_ok = 1; } +#if IS_ENABLED(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) /* * Create a page table and place a pointer to it in a middle page * directory entry. @@ -152,7 +152,6 @@ static void __init fixrange_init(unsigned long start, unsigned long end, static void __init fixaddr_user_init( void) { -#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA long size = FIXADDR_USER_END - FIXADDR_USER_START; pte_t *pte; phys_t p; @@ -174,13 +173,12 @@ static void __init fixaddr_user_init( void) pte = virt_to_kpte(vaddr); pte_set_val(*pte, p, PAGE_READONLY); } -#endif } +#endif void __init paging_init(void) { unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 }; - unsigned long vaddr; empty_zero_page = (unsigned long *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); @@ -191,14 +189,9 @@ void __init paging_init(void) max_zone_pfn[ZONE_NORMAL] = end_iomem >> PAGE_SHIFT; free_area_init(max_zone_pfn); - /* - * Fixed mappings, only the page table structure has to be - * created - mappings will be set by set_fixmap(): - */ - vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; - fixrange_init(vaddr, FIXADDR_TOP, swapper_pg_dir); - +#if IS_ENABLED(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) fixaddr_user_init(); +#endif } /* diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 30bdc0a87dc8..e5a2d4d897e0 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -213,14 +213,6 @@ int __uml_cant_sleep(void) { /* Is in_interrupt() really needed? */ } -int user_context(unsigned long sp) -{ - unsigned long stack; - - stack = sp & (PAGE_MASK << CONFIG_KERNEL_STACK_ORDER); - return stack != (unsigned long) current_thread_info(); -} - extern exitcall_t __uml_exitcall_begin, __uml_exitcall_end; void do_uml_exitcalls(void) diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 8037a967225d..79ea97d4797e 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -264,7 +264,7 @@ EXPORT_SYMBOL(end_iomem); #define MIN_VMALLOC (32 * 1024 * 1024) -static void parse_host_cpu_flags(char *line) +static void __init parse_host_cpu_flags(char *line) { int i; for (i = 0; i < 32*NCAPINTS; i++) { @@ -272,7 +272,8 @@ static void parse_host_cpu_flags(char *line) set_cpu_cap(&boot_cpu_data, i); } } -static void parse_cache_line(char *line) + +static void __init parse_cache_line(char *line) { long res; char *to_parse = strstr(line, ":"); @@ -288,7 +289,7 @@ static void parse_cache_line(char *line) } } -static unsigned long get_top_address(char **envp) +static unsigned long __init get_top_address(char **envp) { unsigned long top_addr = (unsigned long) &top_addr; int i; @@ -376,9 +377,8 @@ int __init linux_main(int argc, char **argv, char **envp) iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK; max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC; - - if (physmem_size + iomem_size > max_physmem) { - physmem_size = max_physmem - iomem_size; + if (physmem_size > max_physmem) { + physmem_size = max_physmem; os_info("Physical memory size shrunk to %llu bytes\n", physmem_size); } diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c index 0afcdeb8995b..3c63ce19e3bf 100644 --- a/arch/um/os-Linux/main.c +++ b/arch/um/os-Linux/main.c @@ -19,13 +19,11 @@ #include <um_malloc.h> #include "internal.h" -#define PGD_BOUND (4 * 1024 * 1024) #define STACKSIZE (8 * 1024 * 1024) -#define THREAD_NAME_LEN (256) long elf_aux_hwcap; -static void set_stklim(void) +static void __init set_stklim(void) { struct rlimit lim; @@ -48,7 +46,7 @@ static void last_ditch_exit(int sig) exit(1); } -static void install_fatal_handler(int sig) +static void __init install_fatal_handler(int sig) { struct sigaction action; @@ -73,7 +71,7 @@ static void install_fatal_handler(int sig) #define UML_LIB_PATH ":" OS_LIB_PATH "/uml" -static void setup_env_path(void) +static void __init setup_env_path(void) { char *new_path = NULL; char *old_path = NULL; diff --git a/arch/x86/Makefile.postlink b/arch/x86/Makefile.postlink index fef2e977cc7d..8b8a68162c94 100644 --- a/arch/x86/Makefile.postlink +++ b/arch/x86/Makefile.postlink @@ -11,6 +11,7 @@ __archpost: -include include/config/auto.conf include $(srctree)/scripts/Kbuild.include +include $(srctree)/scripts/Makefile.lib CMD_RELOCS = arch/x86/tools/relocs OUT_RELOCS = arch/x86/boot/compressed @@ -20,11 +21,6 @@ quiet_cmd_relocs = RELOCS $(OUT_RELOCS)/$@.relocs $(CMD_RELOCS) $@ > $(OUT_RELOCS)/$@.relocs; \ $(CMD_RELOCS) --abs-relocs $@ -quiet_cmd_strip_relocs = RSTRIP $@ - cmd_strip_relocs = \ - $(OBJCOPY) --remove-section='.rel.*' --remove-section='.rel__*' \ - --remove-section='.rela.*' --remove-section='.rela__*' $@ - # `@true` prevents complaint when there is nothing to be done vmlinux: FORCE diff --git a/arch/x86/coco/sev/Makefile b/arch/x86/coco/sev/Makefile index 08de37559307..dcb06dc8b5ae 100644 --- a/arch/x86/coco/sev/Makefile +++ b/arch/x86/coco/sev/Makefile @@ -2,6 +2,10 @@ obj-y += core.o +# jump tables are emitted using absolute references in non-PIC code +# so they cannot be used in the early SEV startup code +CFLAGS_core.o += -fno-jump-tables + ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_core.o = -pg endif diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c index 76e4e74f35b5..f6d2d8aba643 100644 --- a/arch/x86/entry/vdso/vdso32-setup.c +++ b/arch/x86/entry/vdso/vdso32-setup.c @@ -57,7 +57,7 @@ __setup_param("vdso=", vdso_setup, vdso32_setup, 0); /* Register vsyscall32 into the ABI table */ #include <linux/sysctl.h> -static struct ctl_table abi_table2[] = { +static const struct ctl_table abi_table2[] = { { .procname = "vsyscall32", .data = &vdso32_enabled, diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index ab9f3dd87c80..ab0c78855ecb 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -84,7 +84,6 @@ extern int hpet_set_rtc_irq_bit(unsigned long bit_mask); extern int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec); extern int hpet_set_periodic_freq(unsigned long freq); -extern int hpet_rtc_dropped_irq(void); extern int hpet_rtc_timer_init(void); extern irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id); extern int hpet_register_irq_handler(rtc_irq_handler handler); diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index ce4677b8b735..3b496cdcb74b 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -37,6 +37,8 @@ typedef struct { */ atomic64_t tlb_gen; + unsigned long next_trim_cpumask; + #ifdef CONFIG_MODIFY_LDT_SYSCALL struct rw_semaphore ldt_usr_sem; struct ldt_struct *ldt; diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 2886cb668d7f..795fdd53bd0a 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -151,6 +151,7 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); atomic64_set(&mm->context.tlb_gen, 0); + mm->context.next_trim_cpumask = jiffies + HZ; #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 69e79fff41b8..02fc2aa06e9e 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -222,6 +222,7 @@ struct flush_tlb_info { unsigned int initiating_cpu; u8 stride_shift; u8 freed_tables; + u8 trim_cpumask; }; void flush_tlb_local(void); diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 7c15d6e83c37..dae6a73be40e 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -227,6 +227,28 @@ acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end) } static int __init +acpi_check_lapic(union acpi_subtable_headers *header, const unsigned long end) +{ + struct acpi_madt_local_apic *processor = NULL; + + processor = (struct acpi_madt_local_apic *)header; + + if (BAD_MADT_ENTRY(processor, end)) + return -EINVAL; + + /* Ignore invalid ID */ + if (processor->id == 0xff) + return 0; + + /* Ignore processors that can not be onlined */ + if (!acpi_is_processor_usable(processor->lapic_flags)) + return 0; + + has_lapic_cpus = true; + return 0; +} + +static int __init acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_local_apic *processor = NULL; @@ -257,7 +279,6 @@ acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end) processor->processor_id, /* ACPI ID */ processor->lapic_flags & ACPI_MADT_ENABLED); - has_lapic_cpus = true; return 0; } @@ -1026,6 +1047,8 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void) static int __init acpi_parse_madt_lapic_entries(void) { int count, x2count = 0; + struct acpi_subtable_proc madt_proc[2]; + int ret; if (!boot_cpu_has(X86_FEATURE_APIC)) return -ENODEV; @@ -1034,10 +1057,27 @@ static int __init acpi_parse_madt_lapic_entries(void) acpi_parse_sapic, MAX_LOCAL_APIC); if (!count) { - count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, - acpi_parse_lapic, MAX_LOCAL_APIC); - x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, - acpi_parse_x2apic, MAX_LOCAL_APIC); + /* Check if there are valid LAPIC entries */ + acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, acpi_check_lapic, MAX_LOCAL_APIC); + + /* + * Enumerate the APIC IDs in the order that they appear in the + * MADT, no matter LAPIC entry or x2APIC entry is used. + */ + memset(madt_proc, 0, sizeof(madt_proc)); + madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC; + madt_proc[0].handler = acpi_parse_lapic; + madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC; + madt_proc[1].handler = acpi_parse_x2apic; + ret = acpi_table_parse_entries_array(ACPI_SIG_MADT, + sizeof(struct acpi_table_madt), + madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC); + if (ret < 0) { + pr_err("Error parsing LAPIC/X2APIC entries\n"); + return ret; + } + count = madt_proc[0].count; + x2count = madt_proc[1].count; } if (!count && !x2count) { pr_err("No LAPIC entries present\n"); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 243843e44e89..c71b575bf229 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1854,11 +1854,18 @@ static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm) return temp_state; } +__ro_after_init struct mm_struct *poking_mm; +__ro_after_init unsigned long poking_addr; + static inline void unuse_temporary_mm(temp_mm_state_t prev_state) { lockdep_assert_irqs_disabled(); + switch_mm_irqs_off(NULL, prev_state.mm, current); + /* Clear the cpumask, to indicate no TLB flushing is needed anywhere */ + cpumask_clear_cpu(raw_smp_processor_id(), mm_cpumask(poking_mm)); + /* * Restore the breakpoints if they were disabled before the temporary mm * was loaded. @@ -1867,9 +1874,6 @@ static inline void unuse_temporary_mm(temp_mm_state_t prev_state) hw_breakpoint_restore(); } -__ro_after_init struct mm_struct *poking_mm; -__ro_after_init unsigned long poking_addr; - static void text_poke_memcpy(void *dst, const void *src, size_t len) { memcpy(dst, src, len); diff --git a/arch/x86/kernel/cpu/bus_lock.c b/arch/x86/kernel/cpu/bus_lock.c index 704e9241b964..6cba85c79d42 100644 --- a/arch/x86/kernel/cpu/bus_lock.c +++ b/arch/x86/kernel/cpu/bus_lock.c @@ -49,7 +49,7 @@ static unsigned int sysctl_sld_mitigate = 1; static DEFINE_SEMAPHORE(buslock_sem, 1); #ifdef CONFIG_PROC_SYSCTL -static struct ctl_table sld_sysctls[] = { +static const struct ctl_table sld_sysctls[] = { { .procname = "split_lock_mitigate", .data = &sysctl_sld_mitigate, diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 9182303a50b0..7f4b2966e15c 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -1382,12 +1382,6 @@ int hpet_set_periodic_freq(unsigned long freq) } EXPORT_SYMBOL_GPL(hpet_set_periodic_freq); -int hpet_rtc_dropped_irq(void) -{ - return is_hpet_enabled(); -} -EXPORT_SYMBOL_GPL(hpet_rtc_dropped_irq); - static void hpet_rtc_timer_reinit(void) { unsigned int delta; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index ac52255fab01..296d294142c8 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -7,7 +7,6 @@ #include <linux/sched.h> /* test_thread_flag(), ... */ #include <linux/sched/task_stack.h> /* task_stack_*(), ... */ #include <linux/kdebug.h> /* oops_begin/end, ... */ -#include <linux/extable.h> /* search_exception_tables */ #include <linux/memblock.h> /* max_low_pfn */ #include <linux/kfence.h> /* kfence_handle_page_fault */ #include <linux/kprobes.h> /* NOKPROBE_SYMBOL, ... */ diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index a2becb85bea7..6cf881a942bb 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -607,18 +607,15 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, cond_mitigation(tsk); /* - * Stop remote flushes for the previous mm. - * Skip kernel threads; we never send init_mm TLB flushing IPIs, - * but the bitmap manipulation can cause cache line contention. + * Leave this CPU in prev's mm_cpumask. Atomic writes to + * mm_cpumask can be expensive under contention. The CPU + * will be removed lazily at TLB flush time. */ - if (prev != &init_mm) { - VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, - mm_cpumask(prev))); - cpumask_clear_cpu(cpu, mm_cpumask(prev)); - } + VM_WARN_ON_ONCE(prev != &init_mm && !cpumask_test_cpu(cpu, + mm_cpumask(prev))); /* Start receiving IPIs and then read tlb_gen (and LAM below) */ - if (next != &init_mm) + if (next != &init_mm && !cpumask_test_cpu(cpu, mm_cpumask(next))) cpumask_set_cpu(cpu, mm_cpumask(next)); next_tlb_gen = atomic64_read(&next->context.tlb_gen); @@ -760,10 +757,13 @@ static void flush_tlb_func(void *info) if (!local) { inc_irq_stat(irq_tlb_count); count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); + } - /* Can only happen on remote CPUs */ - if (f->mm && f->mm != loaded_mm) - return; + /* The CPU was left in the mm_cpumask of the target mm. Clear it. */ + if (f->mm && f->mm != loaded_mm) { + cpumask_clear_cpu(raw_smp_processor_id(), mm_cpumask(f->mm)); + trace_tlb_flush(TLB_REMOTE_WRONG_CPU, 0); + return; } if (unlikely(loaded_mm == &init_mm)) @@ -893,9 +893,36 @@ done: nr_invalidate); } -static bool tlb_is_not_lazy(int cpu, void *data) +static bool should_flush_tlb(int cpu, void *data) +{ + struct flush_tlb_info *info = data; + + /* Lazy TLB will get flushed at the next context switch. */ + if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu)) + return false; + + /* No mm means kernel memory flush. */ + if (!info->mm) + return true; + + /* The target mm is loaded, and the CPU is not lazy. */ + if (per_cpu(cpu_tlbstate.loaded_mm, cpu) == info->mm) + return true; + + /* In cpumask, but not the loaded mm? Periodically remove by flushing. */ + if (info->trim_cpumask) + return true; + + return false; +} + +static bool should_trim_cpumask(struct mm_struct *mm) { - return !per_cpu(cpu_tlbstate_shared.is_lazy, cpu); + if (time_after(jiffies, READ_ONCE(mm->context.next_trim_cpumask))) { + WRITE_ONCE(mm->context.next_trim_cpumask, jiffies + HZ); + return true; + } + return false; } DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared); @@ -929,7 +956,7 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask, if (info->freed_tables) on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true); else - on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func, + on_each_cpu_cond_mask(should_flush_tlb, flush_tlb_func, (void *)info, 1, cpumask); } @@ -980,6 +1007,7 @@ static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm, info->freed_tables = freed_tables; info->new_tlb_gen = new_tlb_gen; info->initiating_cpu = smp_processor_id(); + info->trim_cpumask = 0; return info; } @@ -1022,6 +1050,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, * flush_tlb_func_local() directly in this case. */ if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) { + info->trim_cpumask = should_trim_cpumask(mm); flush_tlb_multi(mm_cpumask(mm), info); } else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) { lockdep_assert_irqs_enabled(); diff --git a/arch/x86/um/asm/archparam.h b/arch/x86/um/asm/archparam.h deleted file mode 100644 index c17cf68dda0f..000000000000 --- a/arch/x86/um/asm/archparam.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) - * Copyright 2003 PathScale, Inc. - * Licensed under the GPL - */ - -#ifndef __UM_ARCHPARAM_H -#define __UM_ARCHPARAM_H - -#ifdef CONFIG_X86_32 - -#ifdef CONFIG_X86_PAE -#define LAST_PKMAP 512 -#else -#define LAST_PKMAP 1024 -#endif - -#endif - -#endif diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h index 2dd4ca6713f8..8f7476ff6e95 100644 --- a/arch/x86/um/shared/sysdep/ptrace.h +++ b/arch/x86/um/shared/sysdep/ptrace.h @@ -74,8 +74,6 @@ struct uml_pt_regs { #define UPT_FAULTINFO(r) (&(r)->faultinfo) #define UPT_IS_USER(r) ((r)->is_user) -extern int user_context(unsigned long sp); - extern int arch_init_registers(int pid); #endif /* __SYSDEP_X86_PTRACE_H */ diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 55a4996d0c04..2c70cd35e72c 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -781,6 +781,7 @@ void xen_mm_pin_all(void) { struct page *page; + spin_lock(&init_mm.page_table_lock); spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { @@ -791,6 +792,7 @@ void xen_mm_pin_all(void) } spin_unlock(&pgd_lock); + spin_unlock(&init_mm.page_table_lock); } static void __init xen_mark_pinned(struct mm_struct *mm, struct page *page, @@ -887,6 +889,7 @@ void xen_mm_unpin_all(void) { struct page *page; + spin_lock(&init_mm.page_table_lock); spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { @@ -898,6 +901,7 @@ void xen_mm_unpin_all(void) } spin_unlock(&pgd_lock); + spin_unlock(&init_mm.page_table_lock); } static void xen_enter_mmap(struct mm_struct *mm) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 45a395862fbc..9ed93d91d754 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1138,6 +1138,7 @@ static void blkcg_fill_root_iostats(void) blkg_iostat_set(&blkg->iostat.cur, &tmp); u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); } + class_dev_iter_exit(&iter); } static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s) @@ -1545,6 +1546,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol) struct request_queue *q = disk->queue; struct blkg_policy_data *pd_prealloc = NULL; struct blkcg_gq *blkg, *pinned_blkg = NULL; + unsigned int memflags; int ret; if (blkcg_policy_enabled(q, pol)) @@ -1559,7 +1561,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol) return -EINVAL; if (queue_is_mq(q)) - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); retry: spin_lock_irq(&q->queue_lock); @@ -1623,7 +1625,7 @@ retry: spin_unlock_irq(&q->queue_lock); out: if (queue_is_mq(q)) - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); if (pinned_blkg) blkg_put(pinned_blkg); if (pd_prealloc) @@ -1667,12 +1669,13 @@ void blkcg_deactivate_policy(struct gendisk *disk, { struct request_queue *q = disk->queue; struct blkcg_gq *blkg; + unsigned int memflags; if (!blkcg_policy_enabled(q, pol)) return; if (queue_is_mq(q)) - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); mutex_lock(&q->blkcg_mutex); spin_lock_irq(&q->queue_lock); @@ -1696,7 +1699,7 @@ void blkcg_deactivate_policy(struct gendisk *disk, mutex_unlock(&q->blkcg_mutex); if (queue_is_mq(q)) - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); } EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); diff --git a/block/blk-core.c b/block/blk-core.c index 32fb28a6372c..d6c4fa3943b5 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -430,7 +430,6 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id) refcount_set(&q->refs, 1); mutex_init(&q->debugfs_mutex); mutex_init(&q->sysfs_lock); - mutex_init(&q->sysfs_dir_lock); mutex_init(&q->limits_lock); mutex_init(&q->rq_qos_mutex); spin_lock_init(&q->queue_lock); diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c index c9eb4241e048..d479f5481b66 100644 --- a/block/blk-ia-ranges.c +++ b/block/blk-ia-ranges.c @@ -111,7 +111,6 @@ int disk_register_independent_access_ranges(struct gendisk *disk) struct request_queue *q = disk->queue; int i, ret; - lockdep_assert_held(&q->sysfs_dir_lock); lockdep_assert_held(&q->sysfs_lock); if (!iars) @@ -155,7 +154,6 @@ void disk_unregister_independent_access_ranges(struct gendisk *disk) struct blk_independent_access_ranges *iars = disk->ia_ranges; int i; - lockdep_assert_held(&q->sysfs_dir_lock); lockdep_assert_held(&q->sysfs_lock); if (!iars) @@ -289,7 +287,6 @@ void disk_set_independent_access_ranges(struct gendisk *disk, { struct request_queue *q = disk->queue; - mutex_lock(&q->sysfs_dir_lock); mutex_lock(&q->sysfs_lock); if (iars && !disk_check_ia_ranges(disk, iars)) { kfree(iars); @@ -313,6 +310,5 @@ void disk_set_independent_access_ranges(struct gendisk *disk, disk_register_independent_access_ranges(disk); unlock: mutex_unlock(&q->sysfs_lock); - mutex_unlock(&q->sysfs_dir_lock); } EXPORT_SYMBOL_GPL(disk_set_independent_access_ranges); diff --git a/block/blk-iocost.c b/block/blk-iocost.c index a5894ec9696e..65a1d4427ccf 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -3224,6 +3224,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, u32 qos[NR_QOS_PARAMS]; bool enable, user; char *body, *p; + unsigned int memflags; int ret; blkg_conf_init(&ctx, input); @@ -3247,7 +3248,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, ioc = q_to_ioc(disk->queue); } - blk_mq_freeze_queue(disk->queue); + memflags = blk_mq_freeze_queue(disk->queue); blk_mq_quiesce_queue(disk->queue); spin_lock_irq(&ioc->lock); @@ -3347,7 +3348,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, wbt_enable_default(disk); blk_mq_unquiesce_queue(disk->queue); - blk_mq_unfreeze_queue(disk->queue); + blk_mq_unfreeze_queue(disk->queue, memflags); blkg_conf_exit(&ctx); return nbytes; @@ -3355,7 +3356,7 @@ einval: spin_unlock_irq(&ioc->lock); blk_mq_unquiesce_queue(disk->queue); - blk_mq_unfreeze_queue(disk->queue); + blk_mq_unfreeze_queue(disk->queue, memflags); ret = -EINVAL; err: @@ -3414,6 +3415,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, { struct blkg_conf_ctx ctx; struct request_queue *q; + unsigned int memflags; struct ioc *ioc; u64 u[NR_I_LCOEFS]; bool user; @@ -3441,7 +3443,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, ioc = q_to_ioc(q); } - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); spin_lock_irq(&ioc->lock); @@ -3493,7 +3495,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, spin_unlock_irq(&ioc->lock); blk_mq_unquiesce_queue(q); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); blkg_conf_exit(&ctx); return nbytes; @@ -3502,7 +3504,7 @@ einval: spin_unlock_irq(&ioc->lock); blk_mq_unquiesce_queue(q); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); ret = -EINVAL; err: diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index ebb522788d97..42c1e0b9a68f 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -749,9 +749,11 @@ static void blkiolatency_enable_work_fn(struct work_struct *work) */ enabled = atomic_read(&blkiolat->enable_cnt); if (enabled != blkiolat->enabled) { - blk_mq_freeze_queue(blkiolat->rqos.disk->queue); + unsigned int memflags; + + memflags = blk_mq_freeze_queue(blkiolat->rqos.disk->queue); blkiolat->enabled = enabled; - blk_mq_unfreeze_queue(blkiolat->rqos.disk->queue); + blk_mq_unfreeze_queue(blkiolat->rqos.disk->queue, memflags); } } diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index ad8d6a363f24..444798c5374f 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c @@ -87,7 +87,6 @@ void blk_mq_map_hw_queues(struct blk_mq_queue_map *qmap, return; fallback: - WARN_ON_ONCE(qmap->nr_queues > 1); - blk_mq_clear_mq_map(qmap); + blk_mq_map_queues(qmap); } EXPORT_SYMBOL_GPL(blk_mq_map_hw_queues); diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 156e9bb07abf..3feeeccf8a99 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -223,30 +223,27 @@ int blk_mq_sysfs_register(struct gendisk *disk) unsigned long i, j; int ret; - lockdep_assert_held(&q->sysfs_dir_lock); - ret = kobject_add(q->mq_kobj, &disk_to_dev(disk)->kobj, "mq"); if (ret < 0) - goto out; + return ret; kobject_uevent(q->mq_kobj, KOBJ_ADD); + mutex_lock(&q->tag_set->tag_list_lock); queue_for_each_hw_ctx(q, hctx, i) { ret = blk_mq_register_hctx(hctx); if (ret) - goto unreg; + goto out_unreg; } + mutex_unlock(&q->tag_set->tag_list_lock); + return 0; - q->mq_sysfs_init_done = true; - -out: - return ret; - -unreg: +out_unreg: queue_for_each_hw_ctx(q, hctx, j) { if (j < i) blk_mq_unregister_hctx(hctx); } + mutex_unlock(&q->tag_set->tag_list_lock); kobject_uevent(q->mq_kobj, KOBJ_REMOVE); kobject_del(q->mq_kobj); @@ -259,15 +256,13 @@ void blk_mq_sysfs_unregister(struct gendisk *disk) struct blk_mq_hw_ctx *hctx; unsigned long i; - lockdep_assert_held(&q->sysfs_dir_lock); - + mutex_lock(&q->tag_set->tag_list_lock); queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); + mutex_unlock(&q->tag_set->tag_list_lock); kobject_uevent(q->mq_kobj, KOBJ_REMOVE); kobject_del(q->mq_kobj); - - q->mq_sysfs_init_done = false; } void blk_mq_sysfs_unregister_hctxs(struct request_queue *q) @@ -275,15 +270,11 @@ void blk_mq_sysfs_unregister_hctxs(struct request_queue *q) struct blk_mq_hw_ctx *hctx; unsigned long i; - mutex_lock(&q->sysfs_dir_lock); - if (!q->mq_sysfs_init_done) - goto unlock; + if (!blk_queue_registered(q)) + return; queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); - -unlock: - mutex_unlock(&q->sysfs_dir_lock); } int blk_mq_sysfs_register_hctxs(struct request_queue *q) @@ -292,9 +283,8 @@ int blk_mq_sysfs_register_hctxs(struct request_queue *q) unsigned long i; int ret = 0; - mutex_lock(&q->sysfs_dir_lock); - if (!q->mq_sysfs_init_done) - goto unlock; + if (!blk_queue_registered(q)) + goto out; queue_for_each_hw_ctx(q, hctx, i) { ret = blk_mq_register_hctx(hctx); @@ -302,8 +292,6 @@ int blk_mq_sysfs_register_hctxs(struct request_queue *q) break; } -unlock: - mutex_unlock(&q->sysfs_dir_lock); - +out: return ret; } diff --git a/block/blk-mq.c b/block/blk-mq.c index da39a1cac702..40490ac88045 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -210,12 +210,12 @@ int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, } EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait_timeout); -void blk_mq_freeze_queue(struct request_queue *q) +void blk_mq_freeze_queue_nomemsave(struct request_queue *q) { blk_freeze_queue_start(q); blk_mq_freeze_queue_wait(q); } -EXPORT_SYMBOL_GPL(blk_mq_freeze_queue); +EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_nomemsave); bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic) { @@ -236,12 +236,12 @@ bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic) return unfreeze; } -void blk_mq_unfreeze_queue(struct request_queue *q) +void blk_mq_unfreeze_queue_nomemrestore(struct request_queue *q) { if (__blk_mq_unfreeze_queue(q, false)) blk_unfreeze_release_lock(q); } -EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); +EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue_nomemrestore); /* * non_owner variant of blk_freeze_queue_start @@ -4223,13 +4223,14 @@ static void blk_mq_update_tag_set_shared(struct blk_mq_tag_set *set, bool shared) { struct request_queue *q; + unsigned int memflags; lockdep_assert_held(&set->tag_list_lock); list_for_each_entry(q, &set->tag_list, tag_set_list) { - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); queue_set_hctx_shared(q, shared); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); } } @@ -4992,6 +4993,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, struct request_queue *q; LIST_HEAD(head); int prev_nr_hw_queues = set->nr_hw_queues; + unsigned int memflags; int i; lockdep_assert_held(&set->tag_list_lock); @@ -5003,8 +5005,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues) return; + memflags = memalloc_noio_save(); list_for_each_entry(q, &set->tag_list, tag_set_list) - blk_mq_freeze_queue(q); + blk_mq_freeze_queue_nomemsave(q); + /* * Switch IO scheduler to 'none', cleaning up the data associated * with the previous scheduler. We will switch back once we are done @@ -5052,7 +5056,8 @@ switch_back: blk_mq_elv_switch_back(&head, q); list_for_each_entry(q, &set->tag_list, tag_set_list) - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue_nomemrestore(q); + memalloc_noio_restore(memflags); /* Free the excess tags when nr_hw_queues shrink. */ for (i = set->nr_hw_queues; i < prev_nr_hw_queues; i++) diff --git a/block/blk-pm.c b/block/blk-pm.c index 42e842074715..8d3e052f91da 100644 --- a/block/blk-pm.c +++ b/block/blk-pm.c @@ -89,7 +89,7 @@ int blk_pre_runtime_suspend(struct request_queue *q) if (percpu_ref_is_zero(&q->q_usage_counter)) ret = 0; /* Switch q_usage_counter back to per-cpu mode. */ - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue_nomemrestore(q); if (ret < 0) { spin_lock_irq(&q->queue_lock); diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index eb9618cd68ad..d4d4f4dc0e23 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -299,6 +299,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, const struct rq_qos_ops *ops) { struct request_queue *q = disk->queue; + unsigned int memflags; lockdep_assert_held(&q->rq_qos_mutex); @@ -310,14 +311,14 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, * No IO can be in-flight when adding rqos, so freeze queue, which * is fine since we only support rq_qos for blk-mq queue. */ - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); if (rq_qos_id(q, rqos->id)) goto ebusy; rqos->next = q->rq_qos; q->rq_qos = rqos; - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); if (rqos->ops->debugfs_attrs) { mutex_lock(&q->debugfs_mutex); @@ -327,7 +328,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, return 0; ebusy: - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); return -EBUSY; } @@ -335,17 +336,18 @@ void rq_qos_del(struct rq_qos *rqos) { struct request_queue *q = rqos->disk->queue; struct rq_qos **cur; + unsigned int memflags; lockdep_assert_held(&q->rq_qos_mutex); - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) { if (*cur == rqos) { *cur = rqos->next; break; } } - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_rqos(rqos); diff --git a/block/blk-settings.c b/block/blk-settings.c index db12396ff5c7..c44dadc35e1e 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -461,11 +461,12 @@ EXPORT_SYMBOL_GPL(queue_limits_commit_update); int queue_limits_commit_update_frozen(struct request_queue *q, struct queue_limits *lim) { + unsigned int memflags; int ret; - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); ret = queue_limits_commit_update(q, lim); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); return ret; } diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index e09b455874bf..6f548a4376aa 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -681,7 +681,7 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr, struct queue_sysfs_entry *entry = to_queue(attr); struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); struct request_queue *q = disk->queue; - unsigned int noio_flag; + unsigned int memflags; ssize_t res; if (!entry->store_limit && !entry->store) @@ -711,11 +711,9 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr, } mutex_lock(&q->sysfs_lock); - blk_mq_freeze_queue(q); - noio_flag = memalloc_noio_save(); + memflags = blk_mq_freeze_queue(q); res = entry->store(disk, page, length); - memalloc_noio_restore(noio_flag); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); mutex_unlock(&q->sysfs_lock); return res; } @@ -764,7 +762,6 @@ int blk_register_queue(struct gendisk *disk) struct request_queue *q = disk->queue; int ret; - mutex_lock(&q->sysfs_dir_lock); kobject_init(&disk->queue_kobj, &blk_queue_ktype); ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue"); if (ret < 0) @@ -805,7 +802,6 @@ int blk_register_queue(struct gendisk *disk) if (q->elevator) kobject_uevent(&q->elevator->kobj, KOBJ_ADD); mutex_unlock(&q->sysfs_lock); - mutex_unlock(&q->sysfs_dir_lock); /* * SCSI probing may synchronously create and destroy a lot of @@ -830,7 +826,6 @@ out_debugfs_remove: mutex_unlock(&q->sysfs_lock); out_put_queue_kobj: kobject_put(&disk->queue_kobj); - mutex_unlock(&q->sysfs_dir_lock); return ret; } @@ -861,7 +856,6 @@ void blk_unregister_queue(struct gendisk *disk) blk_queue_flag_clear(QUEUE_FLAG_REGISTERED, q); mutex_unlock(&q->sysfs_lock); - mutex_lock(&q->sysfs_dir_lock); /* * Remove the sysfs attributes before unregistering the queue data * structures that can be modified through sysfs. @@ -878,7 +872,6 @@ void blk_unregister_queue(struct gendisk *disk) /* Now that we've deleted all child objects, we can delete the queue. */ kobject_uevent(&disk->queue_kobj, KOBJ_REMOVE); kobject_del(&disk->queue_kobj); - mutex_unlock(&q->sysfs_dir_lock); blk_debugfs_remove(disk); } diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 82dbaefcfa3b..8d149aff9fd0 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1202,6 +1202,7 @@ static int blk_throtl_init(struct gendisk *disk) { struct request_queue *q = disk->queue; struct throtl_data *td; + unsigned int memflags; int ret; td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node); @@ -1215,7 +1216,7 @@ static int blk_throtl_init(struct gendisk *disk) * Freeze queue before activating policy, to synchronize with IO path, * which is protected by 'q_usage_counter'. */ - blk_mq_freeze_queue(disk->queue); + memflags = blk_mq_freeze_queue(disk->queue); blk_mq_quiesce_queue(disk->queue); q->td = td; @@ -1239,7 +1240,7 @@ static int blk_throtl_init(struct gendisk *disk) out: blk_mq_unquiesce_queue(disk->queue); - blk_mq_unfreeze_queue(disk->queue); + blk_mq_unfreeze_queue(disk->queue, memflags); return ret; } diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 9d08a54c201e..761ea662ddc3 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -1717,9 +1717,10 @@ int blk_revalidate_disk_zones(struct gendisk *disk) else pr_warn("%s: failed to revalidate zones\n", disk->disk_name); if (ret) { - blk_mq_freeze_queue(q); + unsigned int memflags = blk_mq_freeze_queue(q); + disk_free_zone_resources(disk); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); } return ret; diff --git a/block/elevator.c b/block/elevator.c index b81216c48b6b..cd2ce4921601 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -570,6 +570,7 @@ static struct elevator_type *elevator_get_default(struct request_queue *q) void elevator_init_mq(struct request_queue *q) { struct elevator_type *e; + unsigned int memflags; int err; WARN_ON_ONCE(blk_queue_registered(q)); @@ -590,13 +591,13 @@ void elevator_init_mq(struct request_queue *q) * * Disk isn't added yet, so verifying queue lock only manually. */ - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); blk_mq_cancel_work_sync(q); err = blk_mq_init_sched(q, e); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); if (err) { pr_warn("\"%s\" elevator initialization failed, " @@ -614,11 +615,12 @@ void elevator_init_mq(struct request_queue *q) */ int elevator_switch(struct request_queue *q, struct elevator_type *new_e) { + unsigned int memflags; int ret; lockdep_assert_held(&q->sysfs_lock); - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); if (q->elevator) { @@ -639,7 +641,7 @@ int elevator_switch(struct request_queue *q, struct elevator_type *new_e) out_unfreeze: blk_mq_unquiesce_queue(q); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); if (ret) { pr_warn("elv: switch to \"%s\" failed, falling back to \"none\"\n", @@ -651,9 +653,11 @@ out_unfreeze: void elevator_disable(struct request_queue *q) { + unsigned int memflags; + lockdep_assert_held(&q->sysfs_lock); - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); elv_unregister_queue(q); @@ -664,7 +668,7 @@ void elevator_disable(struct request_queue *q) blk_add_trace_msg(q, "elv switch: none"); blk_mq_unquiesce_queue(q); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); } /* diff --git a/block/fops.c b/block/fops.c index 6d5c4fc5a216..be9f1dbea9ce 100644 --- a/block/fops.c +++ b/block/fops.c @@ -783,11 +783,12 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) file_accessed(iocb->ki_filp); ret = blkdev_direct_IO(iocb, to); - if (ret >= 0) { + if (ret > 0) { iocb->ki_pos += ret; count -= ret; } - iov_iter_revert(to, count - iov_iter_count(to)); + if (ret != -EIOCBQUEUED) + iov_iter_revert(to, count - iov_iter_count(to)); if (ret < 0 || !count) goto reexpand; } diff --git a/crypto/fips.c b/crypto/fips.c index a58e7750f532..2fa3a9ee61a1 100644 --- a/crypto/fips.c +++ b/crypto/fips.c @@ -41,7 +41,7 @@ __setup("fips=", fips_enable); static char fips_name[] = FIPS_MODULE_NAME; static char fips_version[] = FIPS_MODULE_VERSION; -static struct ctl_table crypto_sysctl_table[] = { +static const struct ctl_table crypto_sysctl_table[] = { { .procname = "fips_enabled", .data = &fips_enabled, diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 80a3481c0470..bfbb08b1e6af 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -442,9 +442,9 @@ static __init int hmat_parse_locality(union acpi_subtable_headers *header, return -EINVAL; } - pr_info("Locality: Flags:%02x Type:%s Initiator Domains:%u Target Domains:%u Base:%lld\n", - hmat_loc->flags, hmat_data_type(type), ipds, tpds, - hmat_loc->entry_base_unit); + pr_debug("Locality: Flags:%02x Type:%s Initiator Domains:%u Target Domains:%u Base:%lld\n", + hmat_loc->flags, hmat_data_type(type), ipds, tpds, + hmat_loc->entry_base_unit); inits = (u32 *)(hmat_loc + 1); targs = inits + ipds; @@ -455,9 +455,9 @@ static __init int hmat_parse_locality(union acpi_subtable_headers *header, value = hmat_normalize(entries[init * tpds + targ], hmat_loc->entry_base_unit, type); - pr_info(" Initiator-Target[%u-%u]:%u%s\n", - inits[init], targs[targ], value, - hmat_data_type_suffix(type)); + pr_debug(" Initiator-Target[%u-%u]:%u%s\n", + inits[init], targs[targ], value, + hmat_data_type_suffix(type)); hmat_update_target(targs[targ], inits[init], mem_hier, type, value); @@ -485,9 +485,9 @@ static __init int hmat_parse_cache(union acpi_subtable_headers *header, } attrs = cache->cache_attributes; - pr_info("Cache: Domain:%u Size:%llu Attrs:%08x SMBIOS Handles:%d\n", - cache->memory_PD, cache->cache_size, attrs, - cache->number_of_SMBIOShandles); + pr_debug("Cache: Domain:%u Size:%llu Attrs:%08x SMBIOS Handles:%d\n", + cache->memory_PD, cache->cache_size, attrs, + cache->number_of_SMBIOShandles); target = find_mem_target(cache->memory_PD); if (!target) @@ -546,9 +546,9 @@ static int __init hmat_parse_proximity_domain(union acpi_subtable_headers *heade } if (hmat_revision == 1) - pr_info("Memory (%#llx length %#llx) Flags:%04x Processor Domain:%u Memory Domain:%u\n", - p->reserved3, p->reserved4, p->flags, p->processor_PD, - p->memory_PD); + pr_debug("Memory (%#llx length %#llx) Flags:%04x Processor Domain:%u Memory Domain:%u\n", + p->reserved3, p->reserved4, p->flags, p->processor_PD, + p->memory_PD); else pr_info("Memory Flags:%04x Processor Domain:%u Memory Domain:%u\n", p->flags, p->processor_PD, p->memory_PD); diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c index cb45ef5240da..068c1612660b 100644 --- a/drivers/acpi/x86/utils.c +++ b/drivers/acpi/x86/utils.c @@ -408,6 +408,19 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = { ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY), }, { + /* Vexia Edu Atla 10 tablet 5V version */ + .matches = { + /* Having all 3 of these not set is somewhat unique */ + DMI_MATCH(DMI_SYS_VENDOR, "To be filled by O.E.M."), + DMI_MATCH(DMI_PRODUCT_NAME, "To be filled by O.E.M."), + DMI_MATCH(DMI_BOARD_NAME, "To be filled by O.E.M."), + /* Above strings are too generic, also match on BIOS date */ + DMI_MATCH(DMI_BIOS_DATE, "05/14/2015"), + }, + .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS | + ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY), + }, + { /* Vexia Edu Atla 10 tablet 9V version */ .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index c085dd81ebe7..63ec2f218431 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4143,6 +4143,10 @@ static const struct ata_dev_quirks_entry __ata_dev_quirks[] = { { "Samsung SSD 860*", NULL, ATA_QUIRK_NO_NCQ_TRIM | ATA_QUIRK_ZERO_AFTER_TRIM | ATA_QUIRK_NO_NCQ_ON_ATI }, + { "Samsung SSD 870 QVO*", NULL, ATA_QUIRK_NO_NCQ_TRIM | + ATA_QUIRK_ZERO_AFTER_TRIM | + ATA_QUIRK_NO_NCQ_ON_ATI | + ATA_QUIRK_NOLPM }, { "Samsung SSD 870*", NULL, ATA_QUIRK_NO_NCQ_TRIM | ATA_QUIRK_ZERO_AFTER_TRIM | ATA_QUIRK_NO_NCQ_ON_ATI }, diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 67f277e1c3bf..5a46c066abc3 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -601,7 +601,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; struct page *page; - unsigned int offset; + unsigned int offset, count; if (!qc->cursg) { qc->curbytes = qc->nbytes; @@ -617,25 +617,27 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) page = nth_page(page, (offset >> PAGE_SHIFT)); offset %= PAGE_SIZE; - trace_ata_sff_pio_transfer_data(qc, offset, qc->sect_size); + /* don't overrun current sg */ + count = min(qc->cursg->length - qc->cursg_ofs, qc->sect_size); + + trace_ata_sff_pio_transfer_data(qc, offset, count); /* * Split the transfer when it splits a page boundary. Note that the * split still has to be dword aligned like all ATA data transfers. */ WARN_ON_ONCE(offset % 4); - if (offset + qc->sect_size > PAGE_SIZE) { + if (offset + count > PAGE_SIZE) { unsigned int split_len = PAGE_SIZE - offset; ata_pio_xfer(qc, page, offset, split_len); - ata_pio_xfer(qc, nth_page(page, 1), 0, - qc->sect_size - split_len); + ata_pio_xfer(qc, nth_page(page, 1), 0, count - split_len); } else { - ata_pio_xfer(qc, page, offset, qc->sect_size); + ata_pio_xfer(qc, page, offset, count); } - qc->curbytes += qc->sect_size; - qc->cursg_ofs += qc->sect_size; + qc->curbytes += count; + qc->cursg_ofs += count; if (qc->cursg_ofs == qc->cursg->length) { qc->cursg = sg_next(qc->cursg); diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 657c93c38b0d..6b9e65a42cd2 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -354,7 +354,7 @@ static struct device *next_device(struct klist_iter *i) * count in the supplied callback. */ int bus_for_each_dev(const struct bus_type *bus, struct device *start, - void *data, int (*fn)(struct device *, void *)) + void *data, device_iter_t fn) { struct subsys_private *sp = bus_to_subsys(bus); struct klist_iter i; @@ -402,9 +402,12 @@ struct device *bus_find_device(const struct bus_type *bus, klist_iter_init_node(&sp->klist_devices, &i, (start ? &start->p->knode_bus : NULL)); - while ((dev = next_device(&i))) - if (match(dev, data) && get_device(dev)) + while ((dev = next_device(&i))) { + if (match(dev, data)) { + get_device(dev); break; + } + } klist_iter_exit(&i); subsys_put(sp); return dev; diff --git a/drivers/base/class.c b/drivers/base/class.c index 582b5a02a5c4..2526c57d924e 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -323,8 +323,12 @@ void class_dev_iter_init(struct class_dev_iter *iter, const struct class *class, struct subsys_private *sp = class_to_subsys(class); struct klist_node *start_knode = NULL; - if (!sp) + memset(iter, 0, sizeof(*iter)); + if (!sp) { + pr_crit("%s: class %p was not registered yet\n", + __func__, class); return; + } if (start) start_knode = &start->p->knode_class; @@ -351,6 +355,9 @@ struct device *class_dev_iter_next(struct class_dev_iter *iter) struct klist_node *knode; struct device *dev; + if (!iter->sp) + return NULL; + while (1) { knode = klist_next(&iter->ki); if (!knode) @@ -395,7 +402,7 @@ EXPORT_SYMBOL_GPL(class_dev_iter_exit); * code. There's no locking restriction. */ int class_for_each_device(const struct class *class, const struct device *start, - void *data, int (*fn)(struct device *, void *)) + void *data, device_iter_t fn) { struct subsys_private *sp = class_to_subsys(class); struct class_dev_iter iter; @@ -594,30 +601,10 @@ EXPORT_SYMBOL_GPL(class_compat_unregister); * a bus device * @cls: the compatibility class * @dev: the target bus device - * @device_link: an optional device to which a "device" link should be created */ -int class_compat_create_link(struct class_compat *cls, struct device *dev, - struct device *device_link) +int class_compat_create_link(struct class_compat *cls, struct device *dev) { - int error; - - error = sysfs_create_link(cls->kobj, &dev->kobj, dev_name(dev)); - if (error) - return error; - - /* - * Optionally add a "device" link (typically to the parent), as a - * class device would have one and we want to provide as much - * backwards compatibility as possible. - */ - if (device_link) { - error = sysfs_create_link(&dev->kobj, &device_link->kobj, - "device"); - if (error) - sysfs_remove_link(cls->kobj, dev_name(dev)); - } - - return error; + return sysfs_create_link(cls->kobj, &dev->kobj, dev_name(dev)); } EXPORT_SYMBOL_GPL(class_compat_create_link); @@ -626,14 +613,9 @@ EXPORT_SYMBOL_GPL(class_compat_create_link); * a bus device * @cls: the compatibility class * @dev: the target bus device - * @device_link: an optional device to which a "device" link was previously - * created */ -void class_compat_remove_link(struct class_compat *cls, struct device *dev, - struct device *device_link) +void class_compat_remove_link(struct class_compat *cls, struct device *dev) { - if (device_link) - sysfs_remove_link(&dev->kobj, "device"); sysfs_remove_link(cls->kobj, dev_name(dev)); } EXPORT_SYMBOL_GPL(class_compat_remove_link); diff --git a/drivers/base/core.c b/drivers/base/core.c index 94865c9d8adc..5a1f05198114 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -3980,7 +3980,7 @@ const char *device_get_devnode(const struct device *dev, * other than 0, we break out and return that value. */ int device_for_each_child(struct device *parent, void *data, - int (*fn)(struct device *dev, void *data)) + device_iter_t fn) { struct klist_iter i; struct device *child; @@ -4010,7 +4010,7 @@ EXPORT_SYMBOL_GPL(device_for_each_child); * other than 0, we break out and return that value. */ int device_for_each_child_reverse(struct device *parent, void *data, - int (*fn)(struct device *dev, void *data)) + device_iter_t fn) { struct klist_iter i; struct device *child; @@ -4043,14 +4043,14 @@ EXPORT_SYMBOL_GPL(device_for_each_child_reverse); * device_for_each_child_reverse_from(); */ int device_for_each_child_reverse_from(struct device *parent, - struct device *from, const void *data, - int (*fn)(struct device *, const void *)) + struct device *from, void *data, + device_iter_t fn) { struct klist_iter i; struct device *child; int error = 0; - if (!parent->p) + if (!parent || !parent->p) return 0; klist_iter_init_node(&parent->p->klist_children, &i, @@ -4079,8 +4079,8 @@ EXPORT_SYMBOL_GPL(device_for_each_child_reverse_from); * * NOTE: you will need to drop the reference with put_device() after use. */ -struct device *device_find_child(struct device *parent, void *data, - int (*match)(struct device *dev, void *data)) +struct device *device_find_child(struct device *parent, const void *data, + device_match_t match) { struct klist_iter i; struct device *child; @@ -4089,62 +4089,17 @@ struct device *device_find_child(struct device *parent, void *data, return NULL; klist_iter_init(&parent->p->klist_children, &i); - while ((child = next_device(&i))) - if (match(child, data) && get_device(child)) + while ((child = next_device(&i))) { + if (match(child, data)) { + get_device(child); break; + } + } klist_iter_exit(&i); return child; } EXPORT_SYMBOL_GPL(device_find_child); -/** - * device_find_child_by_name - device iterator for locating a child device. - * @parent: parent struct device - * @name: name of the child device - * - * This is similar to the device_find_child() function above, but it - * returns a reference to a device that has the name @name. - * - * NOTE: you will need to drop the reference with put_device() after use. - */ -struct device *device_find_child_by_name(struct device *parent, - const char *name) -{ - struct klist_iter i; - struct device *child; - - if (!parent) - return NULL; - - klist_iter_init(&parent->p->klist_children, &i); - while ((child = next_device(&i))) - if (sysfs_streq(dev_name(child), name) && get_device(child)) - break; - klist_iter_exit(&i); - return child; -} -EXPORT_SYMBOL_GPL(device_find_child_by_name); - -static int match_any(struct device *dev, void *unused) -{ - return 1; -} - -/** - * device_find_any_child - device iterator for locating a child device, if any. - * @parent: parent struct device - * - * This is similar to the device_find_child() function above, but it - * returns a reference to a child device, if any. - * - * NOTE: you will need to drop the reference with put_device() after use. - */ -struct device *device_find_any_child(struct device *parent) -{ - return device_find_child(parent, NULL, match_any); -} -EXPORT_SYMBOL_GPL(device_find_any_child); - int __init devices_init(void) { devices_kset = kset_create_and_add("devices", &device_uevent_ops, NULL); @@ -5244,15 +5199,21 @@ int device_match_name(struct device *dev, const void *name) } EXPORT_SYMBOL_GPL(device_match_name); +int device_match_type(struct device *dev, const void *type) +{ + return dev->type == type; +} +EXPORT_SYMBOL_GPL(device_match_type); + int device_match_of_node(struct device *dev, const void *np) { - return dev->of_node == np; + return np && dev->of_node == np; } EXPORT_SYMBOL_GPL(device_match_of_node); int device_match_fwnode(struct device *dev, const void *fwnode) { - return dev_fwnode(dev) == fwnode; + return fwnode && dev_fwnode(dev) == fwnode; } EXPORT_SYMBOL_GPL(device_match_fwnode); @@ -5264,13 +5225,13 @@ EXPORT_SYMBOL_GPL(device_match_devt); int device_match_acpi_dev(struct device *dev, const void *adev) { - return ACPI_COMPANION(dev) == adev; + return adev && ACPI_COMPANION(dev) == adev; } EXPORT_SYMBOL(device_match_acpi_dev); int device_match_acpi_handle(struct device *dev, const void *handle) { - return ACPI_HANDLE(dev) == handle; + return handle && ACPI_HANDLE(dev) == handle; } EXPORT_SYMBOL(device_match_acpi_handle); diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index fdaa24bb641a..a7e511849875 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -599,6 +599,7 @@ CPU_SHOW_VULN_FALLBACK(retbleed); CPU_SHOW_VULN_FALLBACK(spec_rstack_overflow); CPU_SHOW_VULN_FALLBACK(gds); CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling); +CPU_SHOW_VULN_FALLBACK(ghostwrite); static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); @@ -614,6 +615,7 @@ static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL); static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL); static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL); +static DEVICE_ATTR(ghostwrite, 0444, cpu_show_ghostwrite, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -630,6 +632,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_spec_rstack_overflow.attr, &dev_attr_gather_data_sampling.attr, &dev_attr_reg_file_data_sampling.attr, + &dev_attr_ghostwrite.attr, NULL }; diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c index c795edad1b96..64840e5d5fcc 100644 --- a/drivers/base/devcoredump.c +++ b/drivers/base/devcoredump.c @@ -106,7 +106,7 @@ static void devcd_del(struct work_struct *wk) } static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buffer, loff_t offset, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -116,7 +116,7 @@ static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj, } static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buffer, loff_t offset, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -132,19 +132,15 @@ static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj, return count; } -static struct bin_attribute devcd_attr_data = { - .attr = { .name = "data", .mode = S_IRUSR | S_IWUSR, }, - .size = 0, - .read = devcd_data_read, - .write = devcd_data_write, -}; +static const struct bin_attribute devcd_attr_data = + __BIN_ATTR(data, 0600, devcd_data_read, devcd_data_write, 0); -static struct bin_attribute *devcd_dev_bin_attrs[] = { +static const struct bin_attribute *const devcd_dev_bin_attrs[] = { &devcd_attr_data, NULL, }; static const struct attribute_group devcd_dev_group = { - .bin_attrs = devcd_dev_bin_attrs, + .bin_attrs_new = devcd_dev_bin_attrs, }; static const struct attribute_group *devcd_dev_groups[] = { @@ -186,9 +182,9 @@ static ssize_t disabled_show(const struct class *class, const struct class_attri * mutex_lock(&devcd->mutex); * * - * In the above diagram, It looks like disabled_store() would be racing with parallely + * In the above diagram, it looks like disabled_store() would be racing with parallelly * running devcd_del() and result in memory abort while acquiring devcd->mutex which - * is called after kfree of devcd memory after dropping its last reference with + * is called after kfree of devcd memory after dropping its last reference with * put_device(). However, this will not happens as fn(dev, data) runs * with its own reference to device via klist_node so it is not its last reference. * so, above situation would not occur. @@ -285,6 +281,8 @@ static void devcd_free_sgtable(void *data) * @offset: start copy from @offset@ bytes from the head of the data * in the given scatterlist * @data_len: the length of the data in the sg_table + * + * Returns: the number of bytes copied */ static ssize_t devcd_read_from_sgtable(char *buffer, loff_t offset, size_t buf_len, void *data, diff --git a/drivers/base/devres.c b/drivers/base/devres.c index 2152eec0c135..93e7779ef21e 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -750,25 +750,38 @@ int __devm_add_action(struct device *dev, void (*action)(void *), void *data, co EXPORT_SYMBOL_GPL(__devm_add_action); /** - * devm_remove_action() - removes previously added custom action + * devm_remove_action_nowarn() - removes previously added custom action * @dev: Device that owns the action * @action: Function implementing the action * @data: Pointer to data passed to @action implementation * * Removes instance of @action previously added by devm_add_action(). * Both action and data should match one of the existing entries. + * + * In contrast to devm_remove_action(), this function does not WARN() if no + * entry could have been found. + * + * This should only be used if the action is contained in an object with + * independent lifetime management, e.g. the Devres rust abstraction. + * + * Causing the warning from regular driver code most likely indicates an abuse + * of the devres API. + * + * Returns: 0 on success, -ENOENT if no entry could have been found. */ -void devm_remove_action(struct device *dev, void (*action)(void *), void *data) +int devm_remove_action_nowarn(struct device *dev, + void (*action)(void *), + void *data) { struct action_devres devres = { .data = data, .action = action, }; - WARN_ON(devres_destroy(dev, devm_action_release, devm_action_match, - &devres)); + return devres_destroy(dev, devm_action_release, devm_action_match, + &devres); } -EXPORT_SYMBOL_GPL(devm_remove_action); +EXPORT_SYMBOL_GPL(devm_remove_action_nowarn); /** * devm_release_action() - release previously added custom action diff --git a/drivers/base/driver.c b/drivers/base/driver.c index b4eb5b89c4ee..8ab010ddf709 100644 --- a/drivers/base/driver.c +++ b/drivers/base/driver.c @@ -115,7 +115,7 @@ EXPORT_SYMBOL_GPL(driver_set_override); * Iterate over the @drv's list of devices calling @fn for each one. */ int driver_for_each_device(struct device_driver *drv, struct device *start, - void *data, int (*fn)(struct device *, void *)) + void *data, device_iter_t fn) { struct klist_iter i; struct device *dev; @@ -160,9 +160,12 @@ struct device *driver_find_device(const struct device_driver *drv, klist_iter_init_node(&drv->p->klist_devices, &i, (start ? &start->p->knode_driver : NULL)); - while ((dev = next_device(&i))) - if (match(dev, data) && get_device(dev)) + while ((dev = next_device(&i))) { + if (match(dev, data)) { + get_device(dev); break; + } + } klist_iter_exit(&i); return dev; } diff --git a/drivers/base/firmware_loader/fallback_table.c b/drivers/base/firmware_loader/fallback_table.c index ddb70e29eb42..c8afc501a8a4 100644 --- a/drivers/base/firmware_loader/fallback_table.c +++ b/drivers/base/firmware_loader/fallback_table.c @@ -25,7 +25,7 @@ struct firmware_fallback_config fw_fallback_config = { EXPORT_SYMBOL_NS_GPL(fw_fallback_config, "FIRMWARE_LOADER_PRIVATE"); #ifdef CONFIG_SYSCTL -static struct ctl_table firmware_config_table[] = { +static const struct ctl_table firmware_config_table[] = { { .procname = "force_sysfs_fallback", .data = &fw_fallback_config.force_sysfs_fallback, diff --git a/drivers/base/firmware_loader/sysfs.c b/drivers/base/firmware_loader/sysfs.c index c9c93b47d9a5..d254ceb56d84 100644 --- a/drivers/base/firmware_loader/sysfs.c +++ b/drivers/base/firmware_loader/sysfs.c @@ -259,7 +259,7 @@ static void firmware_rw(struct fw_priv *fw_priv, char *buffer, } static ssize_t firmware_data_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buffer, loff_t offset, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -316,7 +316,7 @@ static int fw_realloc_pages(struct fw_sysfs *fw_sysfs, int min_size) * the driver as a firmware image. **/ static ssize_t firmware_data_write(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buffer, loff_t offset, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -356,11 +356,11 @@ out: return retval; } -static struct bin_attribute firmware_attr_data = { +static const struct bin_attribute firmware_attr_data = { .attr = { .name = "data", .mode = 0644 }, .size = 0, - .read = firmware_data_read, - .write = firmware_data_write, + .read_new = firmware_data_read, + .write_new = firmware_data_write, }; static struct attribute *fw_dev_attrs[] = { @@ -374,14 +374,14 @@ static struct attribute *fw_dev_attrs[] = { NULL }; -static struct bin_attribute *fw_dev_bin_attrs[] = { +static const struct bin_attribute *const fw_dev_bin_attrs[] = { &firmware_attr_data, NULL }; static const struct attribute_group fw_dev_attr_group = { .attrs = fw_dev_attrs, - .bin_attrs = fw_dev_bin_attrs, + .bin_attrs_new = fw_dev_bin_attrs, #ifdef CONFIG_FW_UPLOAD .is_visible = fw_upload_is_visible, #endif diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index cbc9a7a75def..d497d448e4b2 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -656,13 +656,15 @@ static void device_resume_noirq(struct device *dev, pm_message_t state, bool asy * so change its status accordingly. * * Otherwise, the device is going to be resumed, so set its PM-runtime - * status to "active", but do that only if DPM_FLAG_SMART_SUSPEND is set - * to avoid confusing drivers that don't use it. + * status to "active" unless its power.set_active flag is clear, in + * which case it is not necessary to update its PM-runtime status. */ - if (skip_resume) + if (skip_resume) { pm_runtime_set_suspended(dev); - else if (dev_pm_skip_suspend(dev)) + } else if (dev->power.set_active) { pm_runtime_set_active(dev); + dev->power.set_active = false; + } if (dev->pm_domain) { info = "noirq power domain "; @@ -1189,18 +1191,24 @@ static pm_message_t resume_event(pm_message_t sleep_state) return PMSG_ON; } -static void dpm_superior_set_must_resume(struct device *dev) +static void dpm_superior_set_must_resume(struct device *dev, bool set_active) { struct device_link *link; int idx; - if (dev->parent) + if (dev->parent) { dev->parent->power.must_resume = true; + if (set_active) + dev->parent->power.set_active = true; + } idx = device_links_read_lock(); - list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) + list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) { link->supplier->power.must_resume = true; + if (set_active) + link->supplier->power.set_active = true; + } device_links_read_unlock(idx); } @@ -1278,8 +1286,11 @@ Skip: dev->power.may_skip_resume)) dev->power.must_resume = true; - if (dev->power.must_resume) - dpm_superior_set_must_resume(dev); + if (dev->power.must_resume) { + dev->power.set_active = dev->power.set_active || + dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND); + dpm_superior_set_must_resume(dev, dev->power.set_active); + } Complete: complete_all(&dev->power.completion); diff --git a/drivers/base/test/Kconfig b/drivers/base/test/Kconfig index 5c7fac80611c..2756870615cc 100644 --- a/drivers/base/test/Kconfig +++ b/drivers/base/test/Kconfig @@ -12,6 +12,7 @@ config TEST_ASYNC_DRIVER_PROBE config DM_KUNIT_TEST tristate "KUnit Tests for the device model" if !KUNIT_ALL_TESTS depends on KUNIT + default KUNIT_ALL_TESTS config DRIVER_PE_KUNIT_TEST tristate "KUnit Tests for property entry API" if !KUNIT_ALL_TESTS diff --git a/drivers/base/test/platform-device-test.c b/drivers/base/test/platform-device-test.c index ea05b8785743..6355a2231b74 100644 --- a/drivers/base/test/platform-device-test.c +++ b/drivers/base/test/platform-device-test.c @@ -1,8 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 +#include <kunit/platform_device.h> #include <kunit/resource.h> #include <linux/device.h> +#include <linux/device/bus.h> +#include <linux/of_platform.h> #include <linux/platform_device.h> #define DEVICE_NAME "test" @@ -217,7 +220,43 @@ static struct kunit_suite platform_device_devm_test_suite = { .test_cases = platform_device_devm_tests, }; -kunit_test_suite(platform_device_devm_test_suite); +static void platform_device_find_by_null_test(struct kunit *test) +{ + struct platform_device *pdev; + int ret; + + pdev = kunit_platform_device_alloc(test, DEVICE_NAME, PLATFORM_DEVID_NONE); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pdev); + + ret = kunit_platform_device_add(test, pdev); + KUNIT_ASSERT_EQ(test, ret, 0); + + KUNIT_EXPECT_PTR_EQ(test, of_find_device_by_node(NULL), NULL); + + KUNIT_EXPECT_PTR_EQ(test, bus_find_device_by_of_node(&platform_bus_type, NULL), NULL); + KUNIT_EXPECT_PTR_EQ(test, bus_find_device_by_fwnode(&platform_bus_type, NULL), NULL); + KUNIT_EXPECT_PTR_EQ(test, bus_find_device_by_acpi_dev(&platform_bus_type, NULL), NULL); + + KUNIT_EXPECT_FALSE(test, device_match_of_node(&pdev->dev, NULL)); + KUNIT_EXPECT_FALSE(test, device_match_fwnode(&pdev->dev, NULL)); + KUNIT_EXPECT_FALSE(test, device_match_acpi_dev(&pdev->dev, NULL)); + KUNIT_EXPECT_FALSE(test, device_match_acpi_handle(&pdev->dev, NULL)); +} + +static struct kunit_case platform_device_match_tests[] = { + KUNIT_CASE(platform_device_find_by_null_test), + {} +}; + +static struct kunit_suite platform_device_match_test_suite = { + .name = "platform-device-match", + .test_cases = platform_device_match_tests, +}; + +kunit_test_suites( + &platform_device_devm_test_suite, + &platform_device_match_test_suite, +); MODULE_DESCRIPTION("Test module for platform devices"); MODULE_AUTHOR("Maxime Ripard <mripard@kernel.org>"); diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index 3523dd82d7a0..4db7f6ce8ade 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -226,10 +226,11 @@ aoedev_downdev(struct aoedev *d) /* fast fail all pending I/O */ if (d->blkq) { /* UP is cleared, freeze+quiesce to insure all are errored */ - blk_mq_freeze_queue(d->blkq); + unsigned int memflags = blk_mq_freeze_queue(d->blkq); + blk_mq_quiesce_queue(d->blkq); blk_mq_unquiesce_queue(d->blkq); - blk_mq_unfreeze_queue(d->blkq); + blk_mq_unfreeze_queue(d->blkq, memflags); } if (d->gd) diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 110f9aca2667..a81ade622a01 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -746,6 +746,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc) unsigned char *p; int sect, nsect; unsigned long flags; + unsigned int memflags; int ret; if (type) { @@ -758,7 +759,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc) } q = unit[drive].disk[type]->queue; - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); local_irq_save(flags); @@ -817,7 +818,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc) ret = FormatError ? -EIO : 0; out: blk_mq_unquiesce_queue(q); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); return ret; } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 1ec7417c7f00..c05fe27a96b6 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -586,6 +586,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, { struct file *file = fget(arg); struct file *old_file; + unsigned int memflags; int error; bool partscan; bool is_loop; @@ -623,14 +624,14 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, /* and ... switch */ disk_force_media_change(lo->lo_disk); - blk_mq_freeze_queue(lo->lo_queue); + memflags = blk_mq_freeze_queue(lo->lo_queue); mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask); lo->lo_backing_file = file; lo->old_gfp_mask = mapping_gfp_mask(file->f_mapping); mapping_set_gfp_mask(file->f_mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); loop_update_dio(lo); - blk_mq_unfreeze_queue(lo->lo_queue); + blk_mq_unfreeze_queue(lo->lo_queue, memflags); partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; loop_global_unlock(lo, is_loop); @@ -1255,6 +1256,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) int err; bool partscan = false; bool size_changed = false; + unsigned int memflags; err = mutex_lock_killable(&lo->lo_mutex); if (err) @@ -1272,7 +1274,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) } /* I/O needs to be drained before changing lo_offset or lo_sizelimit */ - blk_mq_freeze_queue(lo->lo_queue); + memflags = blk_mq_freeze_queue(lo->lo_queue); err = loop_set_status_from_info(lo, info); if (err) @@ -1281,8 +1283,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) partscan = !(lo->lo_flags & LO_FLAGS_PARTSCAN) && (info->lo_flags & LO_FLAGS_PARTSCAN); - lo->lo_flags &= ~(LOOP_SET_STATUS_SETTABLE_FLAGS | - LOOP_SET_STATUS_CLEARABLE_FLAGS); + lo->lo_flags &= ~LOOP_SET_STATUS_CLEARABLE_FLAGS; lo->lo_flags |= (info->lo_flags & LOOP_SET_STATUS_SETTABLE_FLAGS); if (size_changed) { @@ -1295,7 +1296,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) loop_update_dio(lo); out_unfreeze: - blk_mq_unfreeze_queue(lo->lo_queue); + blk_mq_unfreeze_queue(lo->lo_queue, memflags); if (partscan) clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); out_unlock: @@ -1447,6 +1448,7 @@ static int loop_set_capacity(struct loop_device *lo) static int loop_set_dio(struct loop_device *lo, unsigned long arg) { bool use_dio = !!arg; + unsigned int memflags; if (lo->lo_state != Lo_bound) return -ENXIO; @@ -1460,18 +1462,19 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg) vfs_fsync(lo->lo_backing_file, 0); } - blk_mq_freeze_queue(lo->lo_queue); + memflags = blk_mq_freeze_queue(lo->lo_queue); if (use_dio) lo->lo_flags |= LO_FLAGS_DIRECT_IO; else lo->lo_flags &= ~LO_FLAGS_DIRECT_IO; - blk_mq_unfreeze_queue(lo->lo_queue); + blk_mq_unfreeze_queue(lo->lo_queue, memflags); return 0; } static int loop_set_block_size(struct loop_device *lo, unsigned long arg) { struct queue_limits lim; + unsigned int memflags; int err = 0; if (lo->lo_state != Lo_bound) @@ -1486,10 +1489,10 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) lim = queue_limits_start_update(lo->lo_queue); loop_update_limits(lo, &lim, arg); - blk_mq_freeze_queue(lo->lo_queue); + memflags = blk_mq_freeze_queue(lo->lo_queue); err = queue_limits_commit_update(lo->lo_queue, &lim); loop_update_dio(lo); - blk_mq_unfreeze_queue(lo->lo_queue); + blk_mq_unfreeze_queue(lo->lo_queue, memflags); return err; } diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index b63a0f29a54a..7bdc7eb808ea 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1234,6 +1234,7 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, struct socket *sock; struct nbd_sock **socks; struct nbd_sock *nsock; + unsigned int memflags; int err; /* Arg will be cast to int, check it to avoid overflow */ @@ -1247,7 +1248,7 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, * We need to make sure we don't get any errant requests while we're * reallocating the ->socks array. */ - blk_mq_freeze_queue(nbd->disk->queue); + memflags = blk_mq_freeze_queue(nbd->disk->queue); if (!netlink && !nbd->task_setup && !test_bit(NBD_RT_BOUND, &config->runtime_flags)) @@ -1288,12 +1289,12 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, INIT_WORK(&nsock->work, nbd_pending_cmd_work); socks[config->num_connections++] = nsock; atomic_inc(&config->live_connections); - blk_mq_unfreeze_queue(nbd->disk->queue); + blk_mq_unfreeze_queue(nbd->disk->queue, memflags); return 0; put_socket: - blk_mq_unfreeze_queue(nbd->disk->queue); + blk_mq_unfreeze_queue(nbd->disk->queue, memflags); sockfd_put(sock); return err; } diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 5b393e4a1ddf..faafd7ff43d6 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -7281,9 +7281,10 @@ static ssize_t do_rbd_remove(const char *buf, size_t count) * Prevent new IO from being queued and wait for existing * IO to complete/fail. */ - blk_mq_freeze_queue(rbd_dev->disk->queue); + unsigned int memflags = blk_mq_freeze_queue(rbd_dev->disk->queue); + blk_mark_disk_dead(rbd_dev->disk); - blk_mq_unfreeze_queue(rbd_dev->disk->queue); + blk_mq_unfreeze_queue(rbd_dev->disk->queue, memflags); } del_gendisk(rbd_dev->disk); diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 88dcae6ec575..33b3bc99d532 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -918,12 +918,12 @@ struct vdc_check_port_data { char *type; }; -static int vdc_device_probed(struct device *dev, void *arg) +static int vdc_device_probed(struct device *dev, const void *arg) { struct vio_dev *vdev = to_vio_dev(dev); - struct vdc_check_port_data *port_data; + const struct vdc_check_port_data *port_data; - port_data = (struct vdc_check_port_data *)arg; + port_data = (const struct vdc_check_port_data *)arg; if ((vdev->dev_no == port_data->dev_no) && (!(strcmp((char *)&vdev->type, port_data->type))) && @@ -1113,6 +1113,7 @@ static void vdc_requeue_inflight(struct vdc_port *port) static void vdc_queue_drain(struct vdc_port *port) { struct request_queue *q = port->disk->queue; + unsigned int memflags; /* * Mark the queue as draining, then freeze/quiesce to ensure @@ -1121,12 +1122,12 @@ static void vdc_queue_drain(struct vdc_port *port) port->drain = 1; spin_unlock_irq(&port->vio.lock); - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); spin_lock_irq(&port->vio.lock); port->drain = 0; - blk_mq_unquiesce_queue(q); + blk_mq_unquiesce_queue(q, memflags); blk_mq_unfreeze_queue(q); } diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 9914153b365b..3aedcb5add61 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -840,6 +840,7 @@ static int grab_drive(struct floppy_state *fs, enum swim_state state, static void release_drive(struct floppy_state *fs) { struct request_queue *q = disks[fs->index]->queue; + unsigned int memflags; unsigned long flags; swim3_dbg("%s", "-> release drive\n"); @@ -848,10 +849,10 @@ static void release_drive(struct floppy_state *fs) fs->state = idle; spin_unlock_irqrestore(&swim3_lock, flags); - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); blk_mq_unquiesce_queue(q); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); } static int fd_eject(struct floppy_state *fs) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index bfbe391c20fe..6a61ec35f426 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1583,11 +1583,12 @@ static int virtblk_freeze_priv(struct virtio_device *vdev) { struct virtio_blk *vblk = vdev->priv; struct request_queue *q = vblk->disk->queue; + unsigned int memflags; /* Ensure no requests in virtqueues before deleting vqs. */ - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); blk_mq_quiesce_queue_nowait(q); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); /* Ensure we don't receive any more interrupts */ virtio_reset_device(vdev); diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c index 1230045d78a5..aa5ec1d444a9 100644 --- a/drivers/bluetooth/btnxpuart.c +++ b/drivers/bluetooth/btnxpuart.c @@ -1381,13 +1381,12 @@ static void btnxpuart_tx_work(struct work_struct *work) while ((skb = nxp_dequeue(nxpdev))) { len = serdev_device_write_buf(serdev, skb->data, skb->len); - serdev_device_wait_until_sent(serdev, 0); hdev->stat.byte_tx += len; skb_pull(skb, len); if (skb->len > 0) { skb_queue_head(&nxpdev->txq, skb); - break; + continue; } switch (hci_skb_pkt_type(skb)) { diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 9aa018d4f6f5..90966dfbd278 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -899,11 +899,6 @@ static void btusb_reset(struct hci_dev *hdev) struct btusb_data *data; int err; - if (hdev->reset) { - hdev->reset(hdev); - return; - } - data = hci_get_drvdata(hdev); /* This is not an unbalanced PM reference since the device will reset */ err = usb_autopm_get_interface(data->intf); @@ -2639,8 +2634,15 @@ static void btusb_mtk_claim_iso_intf(struct btusb_data *data) struct btmtk_data *btmtk_data = hci_get_priv(data->hdev); int err; + /* + * The function usb_driver_claim_interface() is documented to need + * locks held if it's not called from a probe routine. The code here + * is called from the hci_power_on workqueue, so grab the lock. + */ + device_lock(&btmtk_data->isopkt_intf->dev); err = usb_driver_claim_interface(&btusb_driver, btmtk_data->isopkt_intf, data); + device_unlock(&btmtk_data->isopkt_intf->dev); if (err < 0) { btmtk_data->isopkt_intf = NULL; bt_dev_err(data->hdev, "Failed to claim iso interface"); diff --git a/drivers/bus/fsl-mc/dprc-driver.c b/drivers/bus/fsl-mc/dprc-driver.c index 4b68c84ef485..52053f7c6d9a 100644 --- a/drivers/bus/fsl-mc/dprc-driver.c +++ b/drivers/bus/fsl-mc/dprc-driver.c @@ -22,8 +22,8 @@ struct fsl_mc_child_objs { struct fsl_mc_obj_desc *child_array; }; -static bool fsl_mc_device_match(struct fsl_mc_device *mc_dev, - struct fsl_mc_obj_desc *obj_desc) +static bool fsl_mc_device_match(const struct fsl_mc_device *mc_dev, + const struct fsl_mc_obj_desc *obj_desc) { return mc_dev->obj_desc.id == obj_desc->id && strcmp(mc_dev->obj_desc.type, obj_desc->type) == 0; @@ -112,9 +112,9 @@ void dprc_remove_devices(struct fsl_mc_device *mc_bus_dev, } EXPORT_SYMBOL_GPL(dprc_remove_devices); -static int __fsl_mc_device_match(struct device *dev, void *data) +static int __fsl_mc_device_match(struct device *dev, const void *data) { - struct fsl_mc_obj_desc *obj_desc = data; + const struct fsl_mc_obj_desc *obj_desc = data; struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev); return fsl_mc_device_match(mc_dev, obj_desc); diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c index 2916d1333649..d1f3d327ddd1 100644 --- a/drivers/bus/fsl-mc/fsl-mc-bus.c +++ b/drivers/bus/fsl-mc/fsl-mc-bus.c @@ -320,90 +320,90 @@ const struct bus_type fsl_mc_bus_type = { }; EXPORT_SYMBOL_GPL(fsl_mc_bus_type); -struct device_type fsl_mc_bus_dprc_type = { +const struct device_type fsl_mc_bus_dprc_type = { .name = "fsl_mc_bus_dprc" }; EXPORT_SYMBOL_GPL(fsl_mc_bus_dprc_type); -struct device_type fsl_mc_bus_dpni_type = { +const struct device_type fsl_mc_bus_dpni_type = { .name = "fsl_mc_bus_dpni" }; EXPORT_SYMBOL_GPL(fsl_mc_bus_dpni_type); -struct device_type fsl_mc_bus_dpio_type = { +const struct device_type fsl_mc_bus_dpio_type = { .name = "fsl_mc_bus_dpio" }; EXPORT_SYMBOL_GPL(fsl_mc_bus_dpio_type); -struct device_type fsl_mc_bus_dpsw_type = { +const struct device_type fsl_mc_bus_dpsw_type = { .name = "fsl_mc_bus_dpsw" }; EXPORT_SYMBOL_GPL(fsl_mc_bus_dpsw_type); -struct device_type fsl_mc_bus_dpbp_type = { +const struct device_type fsl_mc_bus_dpbp_type = { .name = "fsl_mc_bus_dpbp" }; EXPORT_SYMBOL_GPL(fsl_mc_bus_dpbp_type); -struct device_type fsl_mc_bus_dpcon_type = { +const struct device_type fsl_mc_bus_dpcon_type = { .name = "fsl_mc_bus_dpcon" }; EXPORT_SYMBOL_GPL(fsl_mc_bus_dpcon_type); -struct device_type fsl_mc_bus_dpmcp_type = { +const struct device_type fsl_mc_bus_dpmcp_type = { .name = "fsl_mc_bus_dpmcp" }; EXPORT_SYMBOL_GPL(fsl_mc_bus_dpmcp_type); -struct device_type fsl_mc_bus_dpmac_type = { +const struct device_type fsl_mc_bus_dpmac_type = { .name = "fsl_mc_bus_dpmac" }; EXPORT_SYMBOL_GPL(fsl_mc_bus_dpmac_type); -struct device_type fsl_mc_bus_dprtc_type = { +const struct device_type fsl_mc_bus_dprtc_type = { .name = "fsl_mc_bus_dprtc" }; EXPORT_SYMBOL_GPL(fsl_mc_bus_dprtc_type); -struct device_type fsl_mc_bus_dpseci_type = { +const struct device_type fsl_mc_bus_dpseci_type = { .name = "fsl_mc_bus_dpseci" }; EXPORT_SYMBOL_GPL(fsl_mc_bus_dpseci_type); -struct device_type fsl_mc_bus_dpdmux_type = { +const struct device_type fsl_mc_bus_dpdmux_type = { .name = "fsl_mc_bus_dpdmux" }; EXPORT_SYMBOL_GPL(fsl_mc_bus_dpdmux_type); -struct device_type fsl_mc_bus_dpdcei_type = { +const struct device_type fsl_mc_bus_dpdcei_type = { .name = "fsl_mc_bus_dpdcei" }; EXPORT_SYMBOL_GPL(fsl_mc_bus_dpdcei_type); -struct device_type fsl_mc_bus_dpaiop_type = { +const struct device_type fsl_mc_bus_dpaiop_type = { .name = "fsl_mc_bus_dpaiop" }; EXPORT_SYMBOL_GPL(fsl_mc_bus_dpaiop_type); -struct device_type fsl_mc_bus_dpci_type = { +const struct device_type fsl_mc_bus_dpci_type = { .name = "fsl_mc_bus_dpci" }; EXPORT_SYMBOL_GPL(fsl_mc_bus_dpci_type); -struct device_type fsl_mc_bus_dpdmai_type = { +const struct device_type fsl_mc_bus_dpdmai_type = { .name = "fsl_mc_bus_dpdmai" }; EXPORT_SYMBOL_GPL(fsl_mc_bus_dpdmai_type); -struct device_type fsl_mc_bus_dpdbg_type = { +const struct device_type fsl_mc_bus_dpdbg_type = { .name = "fsl_mc_bus_dpdbg" }; EXPORT_SYMBOL_GPL(fsl_mc_bus_dpdbg_type); -static struct device_type *fsl_mc_get_device_type(const char *type) +static const struct device_type *fsl_mc_get_device_type(const char *type) { static const struct { - struct device_type *dev_type; + const struct device_type *dev_type; const char *type; } dev_types[] = { { &fsl_mc_bus_dprc_type, "dprc" }, diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 51745ed1bbab..b163e043c687 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -3612,7 +3612,7 @@ static int cdrom_sysctl_handler(const struct ctl_table *ctl, int write, } /* Place files in /proc/sys/dev/cdrom */ -static struct ctl_table cdrom_table[] = { +static const struct ctl_table cdrom_table[] = { { .procname = "info", .data = &cdrom_sysctl_settings.info, diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index 48fe96ab4649..e110857824fc 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -724,7 +724,7 @@ static int hpet_is_known(struct hpet_data *hdp) return 0; } -static struct ctl_table hpet_table[] = { +static const struct ctl_table hpet_table[] = { { .procname = "max-user-freq", .data = &hpet_max_freq, diff --git a/drivers/char/ipmi/ipmi_poweroff.c b/drivers/char/ipmi/ipmi_poweroff.c index 05f17e3e6207..e63c316d8aaa 100644 --- a/drivers/char/ipmi/ipmi_poweroff.c +++ b/drivers/char/ipmi/ipmi_poweroff.c @@ -650,7 +650,7 @@ static struct ipmi_smi_watcher smi_watcher = { #ifdef CONFIG_PROC_FS #include <linux/sysctl.h> -static struct ctl_table ipmi_table[] = { +static const struct ctl_table ipmi_table[] = { { .procname = "poweroff_powercycle", .data = &poweroff_powercycle, .maxlen = sizeof(poweroff_powercycle), diff --git a/drivers/char/random.c b/drivers/char/random.c index 23ee76bbb4aa..2581186fa61b 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1665,7 +1665,7 @@ static int proc_do_rointvec(const struct ctl_table *table, int write, void *buf, return write ? 0 : proc_dointvec(table, 0, buf, lenp, ppos); } -static struct ctl_table random_table[] = { +static const struct ctl_table random_table[] = { { .procname = "poolsize", .data = &sysctl_poolsize, diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 704e84d00639..0ee5c691fb36 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -17,7 +17,7 @@ config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM config ARM_AIROHA_SOC_CPUFREQ tristate "Airoha EN7581 SoC CPUFreq support" - depends on ARCH_AIROHA || COMPILE_TEST + depends on (ARCH_AIROHA && OF) || COMPILE_TEST select PM_OPP default ARCH_AIROHA help diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 302df42d6887..463b69a2dff5 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -909,11 +909,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency) pr_warn(FW_WARN "P-state 0 is not max freq\n"); - if (acpi_cpufreq_driver.set_boost) { - set_boost(policy, acpi_cpufreq_driver.boost_enabled); - policy->boost_enabled = acpi_cpufreq_driver.boost_enabled; - } - return result; err_unreg: diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 2486a6c5256a..8f512448382f 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -611,7 +611,8 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) * Section 8.4.7.1.1.5 of ACPI 6.1 spec) */ policy->min = cppc_perf_to_khz(caps, caps->lowest_nonlinear_perf); - policy->max = cppc_perf_to_khz(caps, caps->nominal_perf); + policy->max = cppc_perf_to_khz(caps, policy->boost_enabled ? + caps->highest_perf : caps->nominal_perf); /* * Set cpuinfo.min_freq to Lowest to make the full range of performance @@ -619,7 +620,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) * nonlinear perf */ policy->cpuinfo.min_freq = cppc_perf_to_khz(caps, caps->lowest_perf); - policy->cpuinfo.max_freq = cppc_perf_to_khz(caps, caps->nominal_perf); + policy->cpuinfo.max_freq = policy->max; policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu); policy->shared_type = cpu_data->shared_type; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 1076e37a18ad..e0048856ecee 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1410,10 +1410,6 @@ static int cpufreq_online(unsigned int cpu) goto out_free_policy; } - /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */ - if (cpufreq_boost_enabled() && policy_has_boost_freq(policy)) - policy->boost_enabled = true; - /* * The initialization has succeeded and the policy is online. * If there is a problem with its frequency table, take it @@ -1476,6 +1472,10 @@ static int cpufreq_online(unsigned int cpu) blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_CREATE_POLICY, policy); + } else { + ret = freq_qos_update_request(policy->max_freq_req, policy->max); + if (ret < 0) + goto out_destroy_policy; } if (cpufreq_driver->get && has_target()) { @@ -1570,6 +1570,18 @@ static int cpufreq_online(unsigned int cpu) if (new_policy && cpufreq_thermal_control_enabled(cpufreq_driver)) policy->cdev = of_cpufreq_cooling_register(policy); + /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */ + if (policy->boost_enabled != cpufreq_boost_enabled()) { + policy->boost_enabled = cpufreq_boost_enabled(); + ret = cpufreq_driver->set_boost(policy, policy->boost_enabled); + if (ret) { + /* If the set_boost fails, the online operation is not affected */ + pr_info("%s: CPU%d: Cannot %s BOOST\n", __func__, policy->cpu, + policy->boost_enabled ? "enable" : "disable"); + policy->boost_enabled = !policy->boost_enabled; + } + } + pr_debug("initialization complete\n"); return 0; diff --git a/drivers/cpufreq/s3c64xx-cpufreq.c b/drivers/cpufreq/s3c64xx-cpufreq.c index c6bdfc308e99..9cef71528076 100644 --- a/drivers/cpufreq/s3c64xx-cpufreq.c +++ b/drivers/cpufreq/s3c64xx-cpufreq.c @@ -24,6 +24,7 @@ struct s3c64xx_dvfs { unsigned int vddarm_max; }; +#ifdef CONFIG_REGULATOR static struct s3c64xx_dvfs s3c64xx_dvfs_table[] = { [0] = { 1000000, 1150000 }, [1] = { 1050000, 1150000 }, @@ -31,6 +32,7 @@ static struct s3c64xx_dvfs s3c64xx_dvfs_table[] = { [3] = { 1200000, 1350000 }, [4] = { 1300000, 1350000 }, }; +#endif static struct cpufreq_frequency_table s3c64xx_freq_table[] = { { 0, 0, 66000 }, @@ -51,15 +53,16 @@ static struct cpufreq_frequency_table s3c64xx_freq_table[] = { static int s3c64xx_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int index) { - struct s3c64xx_dvfs *dvfs; - unsigned int old_freq, new_freq; + unsigned int new_freq = s3c64xx_freq_table[index].frequency; int ret; +#ifdef CONFIG_REGULATOR + struct s3c64xx_dvfs *dvfs; + unsigned int old_freq; + old_freq = clk_get_rate(policy->clk) / 1000; - new_freq = s3c64xx_freq_table[index].frequency; dvfs = &s3c64xx_dvfs_table[s3c64xx_freq_table[index].driver_data]; -#ifdef CONFIG_REGULATOR if (vddarm && new_freq > old_freq) { ret = regulator_set_voltage(vddarm, dvfs->vddarm_min, diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 173ddcac540a..8fe5e1b47ef9 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -41,11 +41,7 @@ * idle state 2, the third bin spans from the target residency of idle state 2 * up to, but not including, the target residency of idle state 3 and so on. * The last bin spans from the target residency of the deepest idle state - * supplied by the driver to the scheduler tick period length or to infinity if - * the tick period length is less than the target residency of that state. In - * the latter case, the governor also counts events with the measured idle - * duration between the tick period length and the target residency of the - * deepest idle state. + * supplied by the driver to infinity. * * Two metrics called "hits" and "intercepts" are associated with each bin. * They are updated every time before selecting an idle state for the given CPU @@ -60,6 +56,10 @@ * into by the sleep length (these events are also referred to as "intercepts" * below). * + * The governor also counts "intercepts" with the measured idle duration below + * the tick period length and uses this information when deciding whether or not + * to stop the scheduler tick. + * * In order to select an idle state for a CPU, the governor takes the following * steps (modulo the possible latency constraint that must be taken into account * too): @@ -106,6 +106,12 @@ #include "gov.h" /* + * Idle state exit latency threshold used for deciding whether or not to check + * the time till the closest expected timer event. + */ +#define LATENCY_THRESHOLD_NS (RESIDENCY_THRESHOLD_NS / 2) + +/* * The PULSE value is added to metrics when they grow and the DECAY_SHIFT value * is used for decreasing metrics on a regular basis. */ @@ -124,18 +130,20 @@ struct teo_bin { /** * struct teo_cpu - CPU data used by the TEO cpuidle governor. - * @time_span_ns: Time between idle state selection and post-wakeup update. * @sleep_length_ns: Time till the closest timer event (at the selection time). * @state_bins: Idle state data bins for this CPU. * @total: Grand total of the "intercepts" and "hits" metrics for all bins. - * @tick_hits: Number of "hits" after TICK_NSEC. + * @tick_intercepts: "Intercepts" before TICK_NSEC. + * @short_idles: Wakeups after short idle periods. + * @artificial_wakeup: Set if the wakeup has been triggered by a safety net. */ struct teo_cpu { - s64 time_span_ns; s64 sleep_length_ns; struct teo_bin state_bins[CPUIDLE_STATE_MAX]; unsigned int total; - unsigned int tick_hits; + unsigned int tick_intercepts; + unsigned int short_idles; + bool artificial_wakeup; }; static DEFINE_PER_CPU(struct teo_cpu, teo_cpus); @@ -152,23 +160,17 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) s64 target_residency_ns; u64 measured_ns; - if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) { + cpu_data->short_idles -= cpu_data->short_idles >> DECAY_SHIFT; + + if (cpu_data->artificial_wakeup) { /* - * One of the safety nets has triggered or the wakeup was close - * enough to the closest timer event expected at the idle state - * selection time to be discarded. + * If one of the safety nets has triggered, assume that this + * might have been a long sleep. */ measured_ns = U64_MAX; } else { u64 lat_ns = drv->states[dev->last_state_idx].exit_latency_ns; - /* - * The computations below are to determine whether or not the - * (saved) time till the next timer event and the measured idle - * duration fall into the same "bin", so use last_residency_ns - * for that instead of time_span_ns which includes the cpuidle - * overhead. - */ measured_ns = dev->last_residency_ns; /* * The delay between the wakeup and the first instruction @@ -176,14 +178,16 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) * time, so take 1/2 of the exit latency as a very rough * approximation of the average of it. */ - if (measured_ns >= lat_ns) + if (measured_ns >= lat_ns) { measured_ns -= lat_ns / 2; - else + if (measured_ns < RESIDENCY_THRESHOLD_NS) + cpu_data->short_idles += PULSE; + } else { measured_ns /= 2; + cpu_data->short_idles += PULSE; + } } - cpu_data->total = 0; - /* * Decay the "hits" and "intercepts" metrics for all of the bins and * find the bins that the sleep length and the measured idle duration @@ -195,8 +199,6 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) bin->hits -= bin->hits >> DECAY_SHIFT; bin->intercepts -= bin->intercepts >> DECAY_SHIFT; - cpu_data->total += bin->hits + bin->intercepts; - target_residency_ns = drv->states[i].target_residency_ns; if (target_residency_ns <= cpu_data->sleep_length_ns) { @@ -206,38 +208,22 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) } } - /* - * If the deepest state's target residency is below the tick length, - * make a record of it to help teo_select() decide whether or not - * to stop the tick. This effectively adds an extra hits-only bin - * beyond the last state-related one. - */ - if (target_residency_ns < TICK_NSEC) { - cpu_data->tick_hits -= cpu_data->tick_hits >> DECAY_SHIFT; - - cpu_data->total += cpu_data->tick_hits; - - if (TICK_NSEC <= cpu_data->sleep_length_ns) { - idx_timer = drv->state_count; - if (TICK_NSEC <= measured_ns) { - cpu_data->tick_hits += PULSE; - goto end; - } - } - } - + cpu_data->tick_intercepts -= cpu_data->tick_intercepts >> DECAY_SHIFT; /* * If the measured idle duration falls into the same bin as the sleep * length, this is a "hit", so update the "hits" metric for that bin. * Otherwise, update the "intercepts" metric for the bin fallen into by * the measured idle duration. */ - if (idx_timer == idx_duration) + if (idx_timer == idx_duration) { cpu_data->state_bins[idx_timer].hits += PULSE; - else + } else { cpu_data->state_bins[idx_duration].intercepts += PULSE; + if (TICK_NSEC <= measured_ns) + cpu_data->tick_intercepts += PULSE; + } -end: + cpu_data->total -= cpu_data->total >> DECAY_SHIFT; cpu_data->total += PULSE; } @@ -285,14 +271,12 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); s64 latency_req = cpuidle_governor_latency_req(dev->cpu); ktime_t delta_tick = TICK_NSEC / 2; - unsigned int tick_intercept_sum = 0; unsigned int idx_intercept_sum = 0; unsigned int intercept_sum = 0; unsigned int idx_hit_sum = 0; unsigned int hit_sum = 0; int constraint_idx = 0; int idx0 = 0, idx = -1; - int prev_intercept_idx; s64 duration_ns; int i; @@ -301,10 +285,14 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, dev->last_state_idx = -1; } - cpu_data->time_span_ns = local_clock(); /* - * Set the expected sleep length to infinity in case of an early - * return. + * Set the sleep length to infinity in case the invocation of + * tick_nohz_get_sleep_length() below is skipped, in which case it won't + * be known whether or not the subsequent wakeup is caused by a timer. + * It is generally fine to count the wakeup as an intercept then, except + * for the cases when the CPU is mostly woken up by timers and there may + * be opportunities to ask for a deeper idle state when no imminent + * timers are scheduled which may be missed. */ cpu_data->sleep_length_ns = KTIME_MAX; @@ -360,17 +348,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, goto end; } - tick_intercept_sum = intercept_sum + - cpu_data->state_bins[drv->state_count-1].intercepts; - /* * If the sum of the intercepts metric for all of the idle states * shallower than the current candidate one (idx) is greater than the * sum of the intercepts and hits metrics for the candidate state and - * all of the deeper states a shallower idle state is likely to be a + * all of the deeper states, a shallower idle state is likely to be a * better choice. */ - prev_intercept_idx = idx; if (2 * idx_intercept_sum > cpu_data->total - idx_hit_sum) { int first_suitable_idx = idx; @@ -396,41 +380,38 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * first enabled state that is deep enough. */ if (teo_state_ok(i, drv) && - !dev->states_usage[i].disable) + !dev->states_usage[i].disable) { idx = i; - else - idx = first_suitable_idx; - + break; + } + idx = first_suitable_idx; break; } if (dev->states_usage[i].disable) continue; - if (!teo_state_ok(i, drv)) { + if (teo_state_ok(i, drv)) { /* - * The current state is too shallow, but if an - * alternative candidate state has been found, - * it may still turn out to be a better choice. + * The current state is deep enough, but still + * there may be a better one. */ - if (first_suitable_idx != idx) - continue; - - break; + first_suitable_idx = i; + continue; } - first_suitable_idx = i; + /* + * The current state is too shallow, so if no suitable + * states other than the initial candidate have been + * found, give up (the remaining states to check are + * shallower still), but otherwise the first suitable + * state other than the initial candidate may turn out + * to be preferable. + */ + if (first_suitable_idx == idx) + break; } } - if (!idx && prev_intercept_idx) { - /* - * We have to query the sleep length here otherwise we don't - * know after wakeup if our guess was correct. - */ - duration_ns = tick_nohz_get_sleep_length(&delta_tick); - cpu_data->sleep_length_ns = duration_ns; - goto out_tick; - } /* * If there is a latency constraint, it may be necessary to select an @@ -440,24 +421,39 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, idx = constraint_idx; /* - * Skip the timers check if state 0 is the current candidate one, - * because an immediate non-timer wakeup is expected in that case. + * If either the candidate state is state 0 or its target residency is + * low enough, there is basically nothing more to do, but if the sleep + * length is not updated, the subsequent wakeup will be counted as an + * "intercept" which may be problematic in the cases when timer wakeups + * are dominant. Namely, it may effectively prevent deeper idle states + * from being selected at one point even if no imminent timers are + * scheduled. + * + * However, frequent timers in the RESIDENCY_THRESHOLD_NS range on one + * CPU are unlikely (user space has a default 50 us slack value for + * hrtimers and there are relatively few timers with a lower deadline + * value in the kernel), and even if they did happen, the potential + * benefit from using a deep idle state in that case would be + * questionable anyway for latency reasons. Thus if the measured idle + * duration falls into that range in the majority of cases, assume + * non-timer wakeups to be dominant and skip updating the sleep length + * to reduce latency. + * + * Also, if the latency constraint is sufficiently low, it will force + * shallow idle states regardless of the wakeup type, so the sleep + * length need not be known in that case. */ - if (!idx) - goto out_tick; - - /* - * If state 0 is a polling one, check if the target residency of - * the current candidate state is low enough and skip the timers - * check in that case too. - */ - if ((drv->states[0].flags & CPUIDLE_FLAG_POLLING) && - drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) + if ((!idx || drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) && + (2 * cpu_data->short_idles >= cpu_data->total || + latency_req < LATENCY_THRESHOLD_NS)) goto out_tick; duration_ns = tick_nohz_get_sleep_length(&delta_tick); cpu_data->sleep_length_ns = duration_ns; + if (!idx) + goto out_tick; + /* * If the closest expected timer is before the target residency of the * candidate state, a shallower one needs to be found. @@ -474,7 +470,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * total wakeup events, do not stop the tick. */ if (drv->states[idx].target_residency_ns < TICK_NSEC && - tick_intercept_sum > cpu_data->total / 2 + cpu_data->total / 8) + cpu_data->tick_intercepts > cpu_data->total / 2 + cpu_data->total / 8) duration_ns = TICK_NSEC / 2; end: @@ -511,17 +507,16 @@ static void teo_reflect(struct cpuidle_device *dev, int state) struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); dev->last_state_idx = state; - /* - * If the wakeup was not "natural", but triggered by one of the safety - * nets, assume that the CPU might have been idle for the entire sleep - * length time. - */ if (dev->poll_time_limit || (tick_nohz_idle_got_tick() && cpu_data->sleep_length_ns > TICK_NSEC)) { + /* + * The wakeup was not "genuine", but triggered by one of the + * safety nets. + */ dev->poll_time_limit = false; - cpu_data->time_span_ns = cpu_data->sleep_length_ns; + cpu_data->artificial_wakeup = true; } else { - cpu_data->time_span_ns = local_clock() - cpu_data->time_span_ns; + cpu_data->artificial_wakeup = false; } } diff --git a/drivers/crypto/intel/qat/qat_common/adf_tl_debugfs.c b/drivers/crypto/intel/qat/qat_common/adf_tl_debugfs.c index c8241f5a0a26..f20ae7e35a0d 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_tl_debugfs.c +++ b/drivers/crypto/intel/qat/qat_common/adf_tl_debugfs.c @@ -473,22 +473,6 @@ unlock_and_exit: } DEFINE_SHOW_STORE_ATTRIBUTE(tl_control); -static int get_rp_index_from_file(const struct file *f, u8 *rp_id, u8 rp_num) -{ - char alpha; - u8 index; - int ret; - - ret = sscanf(f->f_path.dentry->d_name.name, ADF_TL_RP_REGS_FNAME, &alpha); - if (ret != 1) - return -EINVAL; - - index = ADF_TL_DBG_RP_INDEX_ALPHA(alpha); - *rp_id = index; - - return 0; -} - static int adf_tl_dbg_change_rp_index(struct adf_accel_dev *accel_dev, unsigned int new_rp_num, unsigned int rp_regs_index) @@ -611,18 +595,11 @@ static int tl_rp_data_show(struct seq_file *s, void *unused) { struct adf_accel_dev *accel_dev = s->private; u8 rp_regs_index; - u8 max_rp; - int ret; if (!accel_dev) return -EINVAL; - max_rp = GET_TL_DATA(accel_dev).max_rp; - ret = get_rp_index_from_file(s->file, &rp_regs_index, max_rp); - if (ret) { - dev_dbg(&GET_DEV(accel_dev), "invalid RP data file name\n"); - return ret; - } + rp_regs_index = debugfs_get_aux_num(s->file); return tl_print_rp_data(accel_dev, s, rp_regs_index); } @@ -635,7 +612,6 @@ static ssize_t tl_rp_data_write(struct file *file, const char __user *userbuf, struct adf_telemetry *telemetry; unsigned int new_rp_num; u8 rp_regs_index; - u8 max_rp; int ret; accel_dev = seq_f->private; @@ -643,15 +619,10 @@ static ssize_t tl_rp_data_write(struct file *file, const char __user *userbuf, return -EINVAL; telemetry = accel_dev->telemetry; - max_rp = GET_TL_DATA(accel_dev).max_rp; mutex_lock(&telemetry->wr_lock); - ret = get_rp_index_from_file(file, &rp_regs_index, max_rp); - if (ret) { - dev_dbg(&GET_DEV(accel_dev), "invalid RP data file name\n"); - goto unlock_and_exit; - } + rp_regs_index = debugfs_get_aux_num(file); ret = kstrtou32_from_user(userbuf, count, 10, &new_rp_num); if (ret) @@ -689,7 +660,8 @@ void adf_tl_dbgfs_add(struct adf_accel_dev *accel_dev) for (i = 0; i < max_rp; i++) { snprintf(name, sizeof(name), ADF_TL_RP_REGS_FNAME, ADF_TL_DBG_RP_ALPHA_INDEX(i)); - debugfs_create_file(name, 0644, dir, accel_dev, &tl_rp_data_fops); + debugfs_create_file_aux_num(name, 0644, dir, accel_dev, i, + &tl_rp_data_fops); } } diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 28edd5822486..50e6a45b30ba 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -703,7 +703,7 @@ static int cxl_decoder_commit(struct cxl_decoder *cxld) return 0; } -static int commit_reap(struct device *dev, const void *data) +static int commit_reap(struct device *dev, void *data) { struct cxl_port *port = to_cxl_port(dev->parent); struct cxl_decoder *cxld; diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 9d58ab9d33c5..013b869b66cb 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -252,9 +252,9 @@ static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds) } /* require dvsec ranges to be covered by a locked platform window */ -static int dvsec_range_allowed(struct device *dev, void *arg) +static int dvsec_range_allowed(struct device *dev, const void *arg) { - struct range *dev_range = arg; + const struct range *dev_range = arg; struct cxl_decoder *cxld; if (!is_root_decoder(dev)) @@ -291,11 +291,11 @@ static int devm_cxl_enable_hdm(struct device *host, struct cxl_hdm *cxlhdm) return devm_add_action_or_reset(host, disable_hdm, cxlhdm); } -int cxl_dvsec_rr_decode(struct device *dev, struct cxl_port *port, +int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, struct cxl_endpoint_dvsec_info *info) { - struct pci_dev *pdev = to_pci_dev(dev); - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + struct device *dev = cxlds->dev; int hdm_count, rc, i, ranges = 0; int d = cxlds->cxl_dvsec; u16 cap, ctrl; diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index b3378d3f6acb..8853415c106a 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -51,17 +51,6 @@ struct cxl_nvdimm_bridge *to_cxl_nvdimm_bridge(struct device *dev) } EXPORT_SYMBOL_NS_GPL(to_cxl_nvdimm_bridge, "CXL"); -bool is_cxl_nvdimm_bridge(struct device *dev) -{ - return dev->type == &cxl_nvdimm_bridge_type; -} -EXPORT_SYMBOL_NS_GPL(is_cxl_nvdimm_bridge, "CXL"); - -static int match_nvdimm_bridge(struct device *dev, void *data) -{ - return is_cxl_nvdimm_bridge(dev); -} - /** * cxl_find_nvdimm_bridge() - find a bridge device relative to a port * @port: any descendant port of an nvdimm-bridge associated @@ -75,7 +64,9 @@ struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_port *port) if (!cxl_root) return NULL; - dev = device_find_child(&cxl_root->port.dev, NULL, match_nvdimm_bridge); + dev = device_find_child(&cxl_root->port.dev, + &cxl_nvdimm_bridge_type, + device_match_type); if (!dev) return NULL; diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index b98b1ccffd1c..e8d11a988fd9 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -778,7 +778,7 @@ out: return rc; } -static int check_commit_order(struct device *dev, const void *data) +static int check_commit_order(struct device *dev, void *data) { struct cxl_decoder *cxld = to_cxl_decoder(dev); @@ -792,7 +792,7 @@ static int check_commit_order(struct device *dev, const void *data) return 0; } -static int match_free_decoder(struct device *dev, void *data) +static int match_free_decoder(struct device *dev, const void *data) { struct cxl_port *port = to_cxl_port(dev->parent); struct cxl_decoder *cxld; @@ -824,9 +824,9 @@ static int match_free_decoder(struct device *dev, void *data) return 1; } -static int match_auto_decoder(struct device *dev, void *data) +static int match_auto_decoder(struct device *dev, const void *data) { - struct cxl_region_params *p = data; + const struct cxl_region_params *p = data; struct cxl_decoder *cxld; struct range *r; @@ -1733,10 +1733,12 @@ static struct cxl_port *next_port(struct cxl_port *port) return port->parent_dport->port; } -static int match_switch_decoder_by_range(struct device *dev, void *data) +static int match_switch_decoder_by_range(struct device *dev, + const void *data) { struct cxl_switch_decoder *cxlsd; - struct range *r1, *r2 = data; + const struct range *r1, *r2 = data; + if (!is_switch_decoder(dev)) return 0; @@ -3187,9 +3189,10 @@ err: return rc; } -static int match_root_decoder_by_range(struct device *dev, void *data) +static int match_root_decoder_by_range(struct device *dev, + const void *data) { - struct range *r1, *r2 = data; + const struct range *r1, *r2 = data; struct cxl_root_decoder *cxlrd; if (!is_root_decoder(dev)) @@ -3200,11 +3203,11 @@ static int match_root_decoder_by_range(struct device *dev, void *data) return range_contains(r1, r2); } -static int match_region_by_range(struct device *dev, void *data) +static int match_region_by_range(struct device *dev, const void *data) { struct cxl_region_params *p; struct cxl_region *cxlr; - struct range *r = data; + const struct range *r = data; int rc = 0; if (!is_cxl_region(dev)) diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index 59cb35b40c7e..117c2e94c761 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -289,21 +289,17 @@ static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi, return true; } -/** - * cxl_find_regblock_instance() - Locate a register block by type / index - * @pdev: The CXL PCI device to enumerate. - * @type: Register Block Indicator id - * @map: Enumeration output, clobbered on error - * @index: Index into which particular instance of a regblock wanted in the - * order found in register locator DVSEC. - * - * Return: 0 if register block enumerated, negative error code otherwise +/* + * __cxl_find_regblock_instance() - Locate a register block or count instances by type / index + * Use CXL_INSTANCES_COUNT for @index if counting instances. * - * A CXL DVSEC may point to one or more register blocks, search for them - * by @type and @index. + * __cxl_find_regblock_instance() may return: + * 0 - if register block enumerated. + * >= 0 - if counting instances. + * < 0 - error code otherwise. */ -int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type, - struct cxl_register_map *map, int index) +static int __cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map, int index) { u32 regloc_size, regblocks; int instance = 0; @@ -342,8 +338,30 @@ int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type, } map->resource = CXL_RESOURCE_NONE; + if (index == CXL_INSTANCES_COUNT) + return instance; + return -ENODEV; } + +/** + * cxl_find_regblock_instance() - Locate a register block by type / index + * @pdev: The CXL PCI device to enumerate. + * @type: Register Block Indicator id + * @map: Enumeration output, clobbered on error + * @index: Index into which particular instance of a regblock wanted in the + * order found in register locator DVSEC. + * + * Return: 0 if register block enumerated, negative error code otherwise + * + * A CXL DVSEC may point to one or more register blocks, search for them + * by @type and @index. + */ +int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map, unsigned int index) +{ + return __cxl_find_regblock_instance(pdev, type, map, index); +} EXPORT_SYMBOL_NS_GPL(cxl_find_regblock_instance, "CXL"); /** @@ -360,7 +378,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_find_regblock_instance, "CXL"); int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, struct cxl_register_map *map) { - return cxl_find_regblock_instance(pdev, type, map, 0); + return __cxl_find_regblock_instance(pdev, type, map, 0); } EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, "CXL"); @@ -371,19 +389,13 @@ EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, "CXL"); * * Some regblocks may be repeated. Count how many instances. * - * Return: count of matching regblocks. + * Return: non-negative count of matching regblocks, negative error code otherwise. */ int cxl_count_regblock(struct pci_dev *pdev, enum cxl_regloc_type type) { struct cxl_register_map map; - int rc, count = 0; - while (1) { - rc = cxl_find_regblock_instance(pdev, type, &map, count); - if (rc) - return count; - count++; - } + return __cxl_find_regblock_instance(pdev, type, &map, CXL_INSTANCES_COUNT); } EXPORT_SYMBOL_NS_GPL(cxl_count_regblock, "CXL"); diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index 8389a94adb1a..cea706b683b5 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -166,11 +166,13 @@ TRACE_EVENT(cxl_overflow, #define CXL_EVENT_RECORD_FLAG_MAINT_NEEDED BIT(3) #define CXL_EVENT_RECORD_FLAG_PERF_DEGRADED BIT(4) #define CXL_EVENT_RECORD_FLAG_HW_REPLACE BIT(5) +#define CXL_EVENT_RECORD_FLAG_MAINT_OP_SUB_CLASS_VALID BIT(6) #define show_hdr_flags(flags) __print_flags(flags, " | ", \ { CXL_EVENT_RECORD_FLAG_PERMANENT, "PERMANENT_CONDITION" }, \ { CXL_EVENT_RECORD_FLAG_MAINT_NEEDED, "MAINTENANCE_NEEDED" }, \ { CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, "PERFORMANCE_DEGRADED" }, \ - { CXL_EVENT_RECORD_FLAG_HW_REPLACE, "HARDWARE_REPLACEMENT_NEEDED" } \ + { CXL_EVENT_RECORD_FLAG_HW_REPLACE, "HARDWARE_REPLACEMENT_NEEDED" }, \ + { CXL_EVENT_RECORD_FLAG_MAINT_OP_SUB_CLASS_VALID, "MAINT_OP_SUB_CLASS_VALID" } \ ) /* @@ -197,7 +199,8 @@ TRACE_EVENT(cxl_overflow, __field(u16, hdr_related_handle) \ __field(u64, hdr_timestamp) \ __field(u8, hdr_length) \ - __field(u8, hdr_maint_op_class) + __field(u8, hdr_maint_op_class) \ + __field(u8, hdr_maint_op_sub_class) #define CXL_EVT_TP_fast_assign(cxlmd, l, hdr) \ __assign_str(memdev); \ @@ -209,17 +212,19 @@ TRACE_EVENT(cxl_overflow, __entry->hdr_handle = le16_to_cpu((hdr).handle); \ __entry->hdr_related_handle = le16_to_cpu((hdr).related_handle); \ __entry->hdr_timestamp = le64_to_cpu((hdr).timestamp); \ - __entry->hdr_maint_op_class = (hdr).maint_op_class + __entry->hdr_maint_op_class = (hdr).maint_op_class; \ + __entry->hdr_maint_op_sub_class = (hdr).maint_op_sub_class #define CXL_EVT_TP_printk(fmt, ...) \ TP_printk("memdev=%s host=%s serial=%lld log=%s : time=%llu uuid=%pUb " \ "len=%d flags='%s' handle=%x related_handle=%x " \ - "maint_op_class=%u : " fmt, \ + "maint_op_class=%u maint_op_sub_class=%u : " fmt, \ __get_str(memdev), __get_str(host), __entry->serial, \ cxl_event_log_type_str(__entry->log), \ __entry->hdr_timestamp, &__entry->hdr_uuid, __entry->hdr_length,\ show_hdr_flags(__entry->hdr_flags), __entry->hdr_handle, \ __entry->hdr_related_handle, __entry->hdr_maint_op_class, \ + __entry->hdr_maint_op_sub_class, \ ##__VA_ARGS__) TRACE_EVENT(cxl_generic_event, @@ -264,8 +269,30 @@ TRACE_EVENT(cxl_generic_event, ) /* + * Component ID Format + * CXL 3.1 section 8.2.9.2.1; Table 8-44 + */ +#define CXL_PLDM_COMPONENT_ID_ENTITY_VALID BIT(0) +#define CXL_PLDM_COMPONENT_ID_RES_VALID BIT(1) + +#define show_comp_id_pldm_flags(flags) __print_flags(flags, " | ", \ + { CXL_PLDM_COMPONENT_ID_ENTITY_VALID, "PLDM Entity ID" }, \ + { CXL_PLDM_COMPONENT_ID_RES_VALID, "Resource ID" } \ +) + +#define show_pldm_entity_id(flags, valid_comp_id, valid_id_format, comp_id) \ + (flags & valid_comp_id && flags & valid_id_format) ? \ + (comp_id[0] & CXL_PLDM_COMPONENT_ID_ENTITY_VALID) ? \ + __print_hex(&comp_id[1], 6) : "0x00" : "0x00" + +#define show_pldm_resource_id(flags, valid_comp_id, valid_id_format, comp_id) \ + (flags & valid_comp_id && flags & valid_id_format) ? \ + (comp_id[0] & CXL_PLDM_COMPONENT_ID_RES_VALID) ? \ + __print_hex(&comp_id[7], 4) : "0x00" : "0x00" + +/* * General Media Event Record - GMER - * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + * CXL rev 3.1 Section 8.2.9.2.1.1; Table 8-45 */ #define CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT BIT(0) #define CXL_GMER_EVT_DESC_THRESHOLD_EVENT BIT(1) @@ -279,10 +306,18 @@ TRACE_EVENT(cxl_generic_event, #define CXL_GMER_MEM_EVT_TYPE_ECC_ERROR 0x00 #define CXL_GMER_MEM_EVT_TYPE_INV_ADDR 0x01 #define CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR 0x02 -#define show_gmer_mem_event_type(type) __print_symbolic(type, \ - { CXL_GMER_MEM_EVT_TYPE_ECC_ERROR, "ECC Error" }, \ - { CXL_GMER_MEM_EVT_TYPE_INV_ADDR, "Invalid Address" }, \ - { CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR, "Data Path Error" } \ +#define CXL_GMER_MEM_EVT_TYPE_TE_STATE_VIOLATION 0x03 +#define CXL_GMER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR 0x04 +#define CXL_GMER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE 0x05 +#define CXL_GMER_MEM_EVT_TYPE_CKID_VIOLATION 0x06 +#define show_gmer_mem_event_type(type) __print_symbolic(type, \ + { CXL_GMER_MEM_EVT_TYPE_ECC_ERROR, "ECC Error" }, \ + { CXL_GMER_MEM_EVT_TYPE_INV_ADDR, "Invalid Address" }, \ + { CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR, "Data Path Error" }, \ + { CXL_GMER_MEM_EVT_TYPE_TE_STATE_VIOLATION, "TE State Violation" }, \ + { CXL_GMER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR, "Scrub Media ECC Error" }, \ + { CXL_GMER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE, "Adv Prog CME Counter Expiration" }, \ + { CXL_GMER_MEM_EVT_TYPE_CKID_VIOLATION, "CKID Violation" } \ ) #define CXL_GMER_TRANS_UNKNOWN 0x00 @@ -292,6 +327,8 @@ TRACE_EVENT(cxl_generic_event, #define CXL_GMER_TRANS_HOST_INJECT_POISON 0x04 #define CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB 0x05 #define CXL_GMER_TRANS_INTERNAL_MEDIA_MANAGEMENT 0x06 +#define CXL_GMER_TRANS_INTERNAL_MEDIA_ECS 0x07 +#define CXL_GMER_TRANS_MEDIA_INITIALIZATION 0x08 #define show_trans_type(type) __print_symbolic(type, \ { CXL_GMER_TRANS_UNKNOWN, "Unknown" }, \ { CXL_GMER_TRANS_HOST_READ, "Host Read" }, \ @@ -299,18 +336,57 @@ TRACE_EVENT(cxl_generic_event, { CXL_GMER_TRANS_HOST_SCAN_MEDIA, "Host Scan Media" }, \ { CXL_GMER_TRANS_HOST_INJECT_POISON, "Host Inject Poison" }, \ { CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB, "Internal Media Scrub" }, \ - { CXL_GMER_TRANS_INTERNAL_MEDIA_MANAGEMENT, "Internal Media Management" } \ + { CXL_GMER_TRANS_INTERNAL_MEDIA_MANAGEMENT, "Internal Media Management" }, \ + { CXL_GMER_TRANS_INTERNAL_MEDIA_ECS, "Internal Media Error Check Scrub" }, \ + { CXL_GMER_TRANS_MEDIA_INITIALIZATION, "Media Initialization" } \ ) #define CXL_GMER_VALID_CHANNEL BIT(0) #define CXL_GMER_VALID_RANK BIT(1) #define CXL_GMER_VALID_DEVICE BIT(2) #define CXL_GMER_VALID_COMPONENT BIT(3) +#define CXL_GMER_VALID_COMPONENT_ID_FORMAT BIT(4) #define show_valid_flags(flags) __print_flags(flags, "|", \ { CXL_GMER_VALID_CHANNEL, "CHANNEL" }, \ { CXL_GMER_VALID_RANK, "RANK" }, \ { CXL_GMER_VALID_DEVICE, "DEVICE" }, \ - { CXL_GMER_VALID_COMPONENT, "COMPONENT" } \ + { CXL_GMER_VALID_COMPONENT, "COMPONENT" }, \ + { CXL_GMER_VALID_COMPONENT_ID_FORMAT, "COMPONENT PLDM FORMAT" } \ +) + +#define CXL_GMER_CME_EV_FLAG_CME_MULTIPLE_MEDIA BIT(0) +#define CXL_GMER_CME_EV_FLAG_THRESHOLD_EXCEEDED BIT(1) +#define show_cme_threshold_ev_flags(flags) __print_flags(flags, "|", \ + { \ + CXL_GMER_CME_EV_FLAG_CME_MULTIPLE_MEDIA, \ + "Corrected Memory Errors in Multiple Media Components" \ + }, { \ + CXL_GMER_CME_EV_FLAG_THRESHOLD_EXCEEDED, \ + "Exceeded Programmable Threshold" \ + } \ +) + +#define CXL_GMER_MEM_EVT_SUB_TYPE_NOT_REPORTED 0x00 +#define CXL_GMER_MEM_EVT_SUB_TYPE_INTERNAL_DATAPATH_ERROR 0x01 +#define CXL_GMER_MEM_EVT_SUB_TYPE_MEDIA_LINK_COMMAND_TRAINING_ERROR 0x02 +#define CXL_GMER_MEM_EVT_SUB_TYPE_MEDIA_LINK_CONTROL_TRAINING_ERROR 0x03 +#define CXL_GMER_MEM_EVT_SUB_TYPE_MEDIA_LINK_DATA_TRAINING_ERROR 0x04 +#define CXL_GMER_MEM_EVT_SUB_TYPE_MEDIA_LINK_CRC_ERROR 0x05 +#define show_mem_event_sub_type(sub_type) __print_symbolic(sub_type, \ + { CXL_GMER_MEM_EVT_SUB_TYPE_NOT_REPORTED, "Not Reported" }, \ + { CXL_GMER_MEM_EVT_SUB_TYPE_INTERNAL_DATAPATH_ERROR, "Internal Datapath Error" }, \ + { \ + CXL_GMER_MEM_EVT_SUB_TYPE_MEDIA_LINK_COMMAND_TRAINING_ERROR, \ + "Media Link Command Training Error" \ + }, { \ + CXL_GMER_MEM_EVT_SUB_TYPE_MEDIA_LINK_CONTROL_TRAINING_ERROR, \ + "Media Link Control Training Error" \ + }, { \ + CXL_GMER_MEM_EVT_SUB_TYPE_MEDIA_LINK_DATA_TRAINING_ERROR, \ + "Media Link Data Training Error" \ + }, { \ + CXL_GMER_MEM_EVT_SUB_TYPE_MEDIA_LINK_CRC_ERROR, "Media Link CRC Error" \ + } \ ) TRACE_EVENT(cxl_general_media, @@ -336,6 +412,9 @@ TRACE_EVENT(cxl_general_media, __field(u16, validity_flags) __field(u8, rank) __field(u8, dpa_flags) + __field(u32, cme_count) + __field(u8, sub_type) + __field(u8, cme_threshold_ev_flags) __string(region_name, cxlr ? dev_name(&cxlr->dev) : "") ), @@ -350,6 +429,7 @@ TRACE_EVENT(cxl_general_media, __entry->dpa &= CXL_DPA_MASK; __entry->descriptor = rec->media_hdr.descriptor; __entry->type = rec->media_hdr.type; + __entry->sub_type = rec->sub_type; __entry->transaction_type = rec->media_hdr.transaction_type; __entry->channel = rec->media_hdr.channel; __entry->rank = rec->media_hdr.rank; @@ -365,27 +445,40 @@ TRACE_EVENT(cxl_general_media, __assign_str(region_name); uuid_copy(&__entry->region_uuid, &uuid_null); } + __entry->cme_threshold_ev_flags = rec->cme_threshold_ev_flags; + __entry->cme_count = get_unaligned_le24(rec->cme_count); ), CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' " \ - "descriptor='%s' type='%s' transaction_type='%s' channel=%u rank=%u " \ - "device=%x comp_id=%s validity_flags='%s' " \ - "hpa=%llx region=%s region_uuid=%pUb", + "descriptor='%s' type='%s' sub_type='%s' " \ + "transaction_type='%s' channel=%u rank=%u " \ + "device=%x validity_flags='%s' " \ + "comp_id=%s comp_id_pldm_valid_flags='%s' " \ + "pldm_entity_id=%s pldm_resource_id=%s " \ + "hpa=%llx region=%s region_uuid=%pUb " \ + "cme_threshold_ev_flags='%s' cme_count=%u", __entry->dpa, show_dpa_flags(__entry->dpa_flags), show_event_desc_flags(__entry->descriptor), show_gmer_mem_event_type(__entry->type), + show_mem_event_sub_type(__entry->sub_type), show_trans_type(__entry->transaction_type), __entry->channel, __entry->rank, __entry->device, - __print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE), show_valid_flags(__entry->validity_flags), - __entry->hpa, __get_str(region_name), &__entry->region_uuid + __print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE), + show_comp_id_pldm_flags(__entry->comp_id[0]), + show_pldm_entity_id(__entry->validity_flags, CXL_GMER_VALID_COMPONENT, + CXL_GMER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id), + show_pldm_resource_id(__entry->validity_flags, CXL_GMER_VALID_COMPONENT, + CXL_GMER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id), + __entry->hpa, __get_str(region_name), &__entry->region_uuid, + show_cme_threshold_ev_flags(__entry->cme_threshold_ev_flags), __entry->cme_count ) ); /* * DRAM Event Record - DER * - * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 + * CXL rev 3.1 section 8.2.9.2.1.2; Table 8-46 */ /* * DRAM Event Record defines many fields the same as the General Media Event @@ -395,11 +488,17 @@ TRACE_EVENT(cxl_general_media, #define CXL_DER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR 0x01 #define CXL_DER_MEM_EVT_TYPE_INV_ADDR 0x02 #define CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR 0x03 -#define show_dram_mem_event_type(type) __print_symbolic(type, \ - { CXL_DER_MEM_EVT_TYPE_ECC_ERROR, "ECC Error" }, \ - { CXL_DER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR, "Scrub Media ECC Error" }, \ - { CXL_DER_MEM_EVT_TYPE_INV_ADDR, "Invalid Address" }, \ - { CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR, "Data Path Error" } \ +#define CXL_DER_MEM_EVT_TYPE_TE_STATE_VIOLATION 0x04 +#define CXL_DER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE 0x05 +#define CXL_DER_MEM_EVT_TYPE_CKID_VIOLATION 0x06 +#define show_dram_mem_event_type(type) __print_symbolic(type, \ + { CXL_DER_MEM_EVT_TYPE_ECC_ERROR, "ECC Error" }, \ + { CXL_DER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR, "Scrub Media ECC Error" }, \ + { CXL_DER_MEM_EVT_TYPE_INV_ADDR, "Invalid Address" }, \ + { CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR, "Data Path Error" }, \ + { CXL_DER_MEM_EVT_TYPE_TE_STATE_VIOLATION, "TE State Violation" }, \ + { CXL_DER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE, "Adv Prog CME Counter Expiration" }, \ + { CXL_DER_MEM_EVT_TYPE_CKID_VIOLATION, "CKID Violation" } \ ) #define CXL_DER_VALID_CHANNEL BIT(0) @@ -410,15 +509,21 @@ TRACE_EVENT(cxl_general_media, #define CXL_DER_VALID_ROW BIT(5) #define CXL_DER_VALID_COLUMN BIT(6) #define CXL_DER_VALID_CORRECTION_MASK BIT(7) -#define show_dram_valid_flags(flags) __print_flags(flags, "|", \ - { CXL_DER_VALID_CHANNEL, "CHANNEL" }, \ - { CXL_DER_VALID_RANK, "RANK" }, \ - { CXL_DER_VALID_NIBBLE, "NIBBLE" }, \ - { CXL_DER_VALID_BANK_GROUP, "BANK GROUP" }, \ - { CXL_DER_VALID_BANK, "BANK" }, \ - { CXL_DER_VALID_ROW, "ROW" }, \ - { CXL_DER_VALID_COLUMN, "COLUMN" }, \ - { CXL_DER_VALID_CORRECTION_MASK, "CORRECTION MASK" } \ +#define CXL_DER_VALID_COMPONENT BIT(8) +#define CXL_DER_VALID_COMPONENT_ID_FORMAT BIT(9) +#define CXL_DER_VALID_SUB_CHANNEL BIT(10) +#define show_dram_valid_flags(flags) __print_flags(flags, "|", \ + { CXL_DER_VALID_CHANNEL, "CHANNEL" }, \ + { CXL_DER_VALID_RANK, "RANK" }, \ + { CXL_DER_VALID_NIBBLE, "NIBBLE" }, \ + { CXL_DER_VALID_BANK_GROUP, "BANK GROUP" }, \ + { CXL_DER_VALID_BANK, "BANK" }, \ + { CXL_DER_VALID_ROW, "ROW" }, \ + { CXL_DER_VALID_COLUMN, "COLUMN" }, \ + { CXL_DER_VALID_CORRECTION_MASK, "CORRECTION MASK" }, \ + { CXL_DER_VALID_COMPONENT, "COMPONENT" }, \ + { CXL_DER_VALID_COMPONENT_ID_FORMAT, "COMPONENT PLDM FORMAT" }, \ + { CXL_DER_VALID_SUB_CHANNEL, "SUB CHANNEL" } \ ) TRACE_EVENT(cxl_dram, @@ -447,6 +552,12 @@ TRACE_EVENT(cxl_dram, __field(u8, bank_group) /* Out of order to pack trace record */ __field(u8, bank) /* Out of order to pack trace record */ __field(u8, dpa_flags) /* Out of order to pack trace record */ + /* Following are out of order to pack trace record */ + __array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE) + __field(u32, cvme_count) + __field(u8, sub_type) + __field(u8, sub_channel) + __field(u8, cme_threshold_ev_flags) __string(region_name, cxlr ? dev_name(&cxlr->dev) : "") ), @@ -460,6 +571,7 @@ TRACE_EVENT(cxl_dram, __entry->dpa &= CXL_DPA_MASK; __entry->descriptor = rec->media_hdr.descriptor; __entry->type = rec->media_hdr.type; + __entry->sub_type = rec->sub_type; __entry->transaction_type = rec->media_hdr.transaction_type; __entry->validity_flags = get_unaligned_le16(rec->media_hdr.validity_flags); __entry->channel = rec->media_hdr.channel; @@ -479,30 +591,47 @@ TRACE_EVENT(cxl_dram, __assign_str(region_name); uuid_copy(&__entry->region_uuid, &uuid_null); } + memcpy(__entry->comp_id, &rec->component_id, + CXL_EVENT_GEN_MED_COMP_ID_SIZE); + __entry->sub_channel = rec->sub_channel; + __entry->cme_threshold_ev_flags = rec->cme_threshold_ev_flags; + __entry->cvme_count = get_unaligned_le24(rec->cvme_count); ), - CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' descriptor='%s' type='%s' " \ + CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' descriptor='%s' type='%s' sub_type='%s' " \ "transaction_type='%s' channel=%u rank=%u nibble_mask=%x " \ "bank_group=%u bank=%u row=%u column=%u cor_mask=%s " \ "validity_flags='%s' " \ - "hpa=%llx region=%s region_uuid=%pUb", + "comp_id=%s comp_id_pldm_valid_flags='%s' " \ + "pldm_entity_id=%s pldm_resource_id=%s " \ + "hpa=%llx region=%s region_uuid=%pUb " \ + "sub_channel=%u cme_threshold_ev_flags='%s' cvme_count=%u", __entry->dpa, show_dpa_flags(__entry->dpa_flags), show_event_desc_flags(__entry->descriptor), show_dram_mem_event_type(__entry->type), + show_mem_event_sub_type(__entry->sub_type), show_trans_type(__entry->transaction_type), __entry->channel, __entry->rank, __entry->nibble_mask, __entry->bank_group, __entry->bank, __entry->row, __entry->column, __print_hex(__entry->cor_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE), show_dram_valid_flags(__entry->validity_flags), - __entry->hpa, __get_str(region_name), &__entry->region_uuid + __print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE), + show_comp_id_pldm_flags(__entry->comp_id[0]), + show_pldm_entity_id(__entry->validity_flags, CXL_DER_VALID_COMPONENT, + CXL_DER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id), + show_pldm_resource_id(__entry->validity_flags, CXL_DER_VALID_COMPONENT, + CXL_DER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id), + __entry->hpa, __get_str(region_name), &__entry->region_uuid, + __entry->sub_channel, show_cme_threshold_ev_flags(__entry->cme_threshold_ev_flags), + __entry->cvme_count ) ); /* * Memory Module Event Record - MMER * - * CXL res 3.0 section 8.2.9.2.1.3; Table 8-45 + * CXL res 3.1 section 8.2.9.2.1.3; Table 8-47 */ #define CXL_MMER_HEALTH_STATUS_CHANGE 0x00 #define CXL_MMER_MEDIA_STATUS_CHANGE 0x01 @@ -510,27 +639,35 @@ TRACE_EVENT(cxl_dram, #define CXL_MMER_TEMP_CHANGE 0x03 #define CXL_MMER_DATA_PATH_ERROR 0x04 #define CXL_MMER_LSA_ERROR 0x05 +#define CXL_MMER_UNRECOV_SIDEBAND_BUS_ERROR 0x06 +#define CXL_MMER_MEMORY_MEDIA_FRU_ERROR 0x07 +#define CXL_MMER_POWER_MANAGEMENT_FAULT 0x08 #define show_dev_evt_type(type) __print_symbolic(type, \ { CXL_MMER_HEALTH_STATUS_CHANGE, "Health Status Change" }, \ { CXL_MMER_MEDIA_STATUS_CHANGE, "Media Status Change" }, \ { CXL_MMER_LIFE_USED_CHANGE, "Life Used Change" }, \ { CXL_MMER_TEMP_CHANGE, "Temperature Change" }, \ { CXL_MMER_DATA_PATH_ERROR, "Data Path Error" }, \ - { CXL_MMER_LSA_ERROR, "LSA Error" } \ + { CXL_MMER_LSA_ERROR, "LSA Error" }, \ + { CXL_MMER_UNRECOV_SIDEBAND_BUS_ERROR, "Unrecoverable Internal Sideband Bus Error" }, \ + { CXL_MMER_MEMORY_MEDIA_FRU_ERROR, "Memory Media FRU Error" }, \ + { CXL_MMER_POWER_MANAGEMENT_FAULT, "Power Management Fault" } \ ) /* * Device Health Information - DHI * - * CXL res 3.0 section 8.2.9.8.3.1; Table 8-100 + * CXL res 3.1 section 8.2.9.9.3.1; Table 8-133 */ #define CXL_DHI_HS_MAINTENANCE_NEEDED BIT(0) #define CXL_DHI_HS_PERFORMANCE_DEGRADED BIT(1) #define CXL_DHI_HS_HW_REPLACEMENT_NEEDED BIT(2) +#define CXL_DHI_HS_MEM_CAPACITY_DEGRADED BIT(3) #define show_health_status_flags(flags) __print_flags(flags, "|", \ { CXL_DHI_HS_MAINTENANCE_NEEDED, "MAINTENANCE_NEEDED" }, \ { CXL_DHI_HS_PERFORMANCE_DEGRADED, "PERFORMANCE_DEGRADED" }, \ - { CXL_DHI_HS_HW_REPLACEMENT_NEEDED, "REPLACEMENT_NEEDED" } \ + { CXL_DHI_HS_HW_REPLACEMENT_NEEDED, "REPLACEMENT_NEEDED" }, \ + { CXL_DHI_HS_MEM_CAPACITY_DEGRADED, "MEM_CAPACITY_DEGRADED" } \ ) #define CXL_DHI_MS_NORMAL 0x00 @@ -584,6 +721,26 @@ TRACE_EVENT(cxl_dram, #define CXL_DHI_AS_COR_VOL_ERR_CNT(as) ((as & 0x10) >> 4) #define CXL_DHI_AS_COR_PER_ERR_CNT(as) ((as & 0x20) >> 5) +#define CXL_MMER_VALID_COMPONENT BIT(0) +#define CXL_MMER_VALID_COMPONENT_ID_FORMAT BIT(1) +#define show_mem_module_valid_flags(flags) __print_flags(flags, "|", \ + { CXL_MMER_VALID_COMPONENT, "COMPONENT" }, \ + { CXL_MMER_VALID_COMPONENT_ID_FORMAT, "COMPONENT PLDM FORMAT" } \ +) +#define CXL_MMER_DEV_EVT_SUB_TYPE_NOT_REPORTED 0x00 +#define CXL_MMER_DEV_EVT_SUB_TYPE_INVALID_CONFIG_DATA 0x01 +#define CXL_MMER_DEV_EVT_SUB_TYPE_UNSUPP_CONFIG_DATA 0x02 +#define CXL_MMER_DEV_EVT_SUB_TYPE_UNSUPP_MEM_MEDIA_FRU 0x03 +#define show_dev_event_sub_type(sub_type) __print_symbolic(sub_type, \ + { CXL_MMER_DEV_EVT_SUB_TYPE_NOT_REPORTED, "Not Reported" }, \ + { CXL_MMER_DEV_EVT_SUB_TYPE_INVALID_CONFIG_DATA, "Invalid Config Data" }, \ + { CXL_MMER_DEV_EVT_SUB_TYPE_UNSUPP_CONFIG_DATA, "Unsupported Config Data" }, \ + { \ + CXL_MMER_DEV_EVT_SUB_TYPE_UNSUPP_MEM_MEDIA_FRU, \ + "Unsupported Memory Media FRU" \ + } \ +) + TRACE_EVENT(cxl_memory_module, TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log, @@ -606,6 +763,9 @@ TRACE_EVENT(cxl_memory_module, __field(u32, cor_per_err_cnt) __field(s16, device_temp) __field(u8, add_status) + __field(u8, event_sub_type) + __array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE) + __field(u16, validity_flags) ), TP_fast_assign( @@ -614,6 +774,7 @@ TRACE_EVENT(cxl_memory_module, /* Memory Module Event */ __entry->event_type = rec->event_type; + __entry->event_sub_type = rec->event_sub_type; /* Device Health Info */ __entry->health_status = rec->info.health_status; @@ -624,13 +785,20 @@ TRACE_EVENT(cxl_memory_module, __entry->cor_per_err_cnt = get_unaligned_le32(rec->info.cor_per_err_cnt); __entry->device_temp = get_unaligned_le16(rec->info.device_temp); __entry->add_status = rec->info.add_status; + __entry->validity_flags = get_unaligned_le16(rec->validity_flags); + memcpy(__entry->comp_id, &rec->component_id, + CXL_EVENT_GEN_MED_COMP_ID_SIZE); ), - CXL_EVT_TP_printk("event_type='%s' health_status='%s' media_status='%s' " \ - "as_life_used=%s as_dev_temp=%s as_cor_vol_err_cnt=%s " \ + CXL_EVT_TP_printk("event_type='%s' event_sub_type='%s' health_status='%s' " \ + "media_status='%s' as_life_used=%s as_dev_temp=%s as_cor_vol_err_cnt=%s " \ "as_cor_per_err_cnt=%s life_used=%u device_temp=%d " \ - "dirty_shutdown_cnt=%u cor_vol_err_cnt=%u cor_per_err_cnt=%u", + "dirty_shutdown_cnt=%u cor_vol_err_cnt=%u cor_per_err_cnt=%u " \ + "validity_flags='%s' " \ + "comp_id=%s comp_id_pldm_valid_flags='%s' " \ + "pldm_entity_id=%s pldm_resource_id=%s", show_dev_evt_type(__entry->event_type), + show_dev_event_sub_type(__entry->event_sub_type), show_health_status_flags(__entry->health_status), show_media_status(__entry->media_status), show_two_bit_status(CXL_DHI_AS_LIFE_USED(__entry->add_status)), @@ -639,7 +807,14 @@ TRACE_EVENT(cxl_memory_module, show_one_bit_status(CXL_DHI_AS_COR_PER_ERR_CNT(__entry->add_status)), __entry->life_used, __entry->device_temp, __entry->dirty_shutdown_cnt, __entry->cor_vol_err_cnt, - __entry->cor_per_err_cnt + __entry->cor_per_err_cnt, + show_mem_module_valid_flags(__entry->validity_flags), + __print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE), + show_comp_id_pldm_flags(__entry->comp_id[0]), + show_pldm_entity_id(__entry->validity_flags, CXL_MMER_VALID_COMPONENT, + CXL_MMER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id), + show_pldm_resource_id(__entry->validity_flags, CXL_MMER_VALID_COMPONENT, + CXL_MMER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id) ) ); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index f6015f24ad38..bbbaa0d0a670 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -302,10 +302,11 @@ int cxl_map_device_regs(const struct cxl_register_map *map, struct cxl_device_regs *regs); int cxl_map_pmu_regs(struct cxl_register_map *map, struct cxl_pmu_regs *regs); +#define CXL_INSTANCES_COUNT -1 enum cxl_regloc_type; int cxl_count_regblock(struct pci_dev *pdev, enum cxl_regloc_type type); int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type, - struct cxl_register_map *map, int index); + struct cxl_register_map *map, unsigned int index); int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, struct cxl_register_map *map); int cxl_setup_regs(struct cxl_register_map *map); @@ -821,7 +822,8 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, struct cxl_endpoint_dvsec_info *info); int devm_cxl_add_passthrough_decoder(struct cxl_port *port); -int cxl_dvsec_rr_decode(struct device *dev, struct cxl_port *port, +struct cxl_dev_state; +int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, struct cxl_endpoint_dvsec_info *info); bool is_cxl_region(struct device *dev); @@ -864,7 +866,6 @@ struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host, struct cxl_port *port); struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev); bool is_cxl_nvdimm(struct device *dev); -bool is_cxl_nvdimm_bridge(struct device *dev); int devm_cxl_add_nvdimm(struct cxl_port *parent_port, struct cxl_memdev *cxlmd); struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_port *port); diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 6d94ff4a4f1a..a96e54c6259e 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -907,7 +907,8 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) struct cxl_dev_state *cxlds; struct cxl_register_map map; struct cxl_memdev *cxlmd; - int i, rc, pmu_count; + int rc, pmu_count; + unsigned int i; bool irq_avail; /* @@ -1009,6 +1010,9 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return rc; pmu_count = cxl_count_regblock(pdev, CXL_REGLOC_RBI_PMU); + if (pmu_count < 0) + return pmu_count; + for (i = 0; i < pmu_count; i++) { struct cxl_pmu_regs pmu_regs; diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 4c83f6a22e58..d2bfd1ff5492 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -98,7 +98,7 @@ static int cxl_endpoint_port_probe(struct cxl_port *port) struct cxl_port *root; int rc; - rc = cxl_dvsec_rr_decode(cxlds->dev, port, &info); + rc = cxl_dvsec_rr_decode(cxlds, &info); if (rc < 0) return rc; diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index e994d6e0779e..8afea2e23360 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -162,8 +162,8 @@ config DMA_SA11X0 config DMA_SUN4I tristate "Allwinner A10 DMA SoCs support" - depends on MACH_SUN4I || MACH_SUN5I || MACH_SUN7I - default (MACH_SUN4I || MACH_SUN5I || MACH_SUN7I) + depends on MACH_SUN4I || MACH_SUN5I || MACH_SUN7I || MACH_SUNIV + default (MACH_SUN4I || MACH_SUN5I || MACH_SUN7I || MACH_SUNIV) select DMA_ENGINE select DMA_VIRTUAL_CHANNELS help @@ -740,8 +740,6 @@ source "drivers/dma/bestcomm/Kconfig" source "drivers/dma/mediatek/Kconfig" -source "drivers/dma/ptdma/Kconfig" - source "drivers/dma/qcom/Kconfig" source "drivers/dma/dw/Kconfig" diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 5b2a52f4f2ee..19ba465011a6 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -16,7 +16,6 @@ obj-$(CONFIG_DMATEST) += dmatest.o obj-$(CONFIG_ALTERA_MSGDMA) += altera-msgdma.o obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/ -obj-$(CONFIG_AMD_PTDMA) += ptdma/ obj-$(CONFIG_APPLE_ADMAC) += apple-admac.o obj-$(CONFIG_AT_HDMAC) += at_hdmac.o obj-$(CONFIG_AT_XDMAC) += at_xdmac.o diff --git a/drivers/dma/amd/Kconfig b/drivers/dma/amd/Kconfig index 7d1f51d69675..00d874872a8f 100644 --- a/drivers/dma/amd/Kconfig +++ b/drivers/dma/amd/Kconfig @@ -1,4 +1,32 @@ # SPDX-License-Identifier: GPL-2.0-only +# + +config AMD_AE4DMA + tristate "AMD AE4DMA Engine" + depends on (X86_64 || COMPILE_TEST) && PCI + depends on AMD_PTDMA + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Enable support for the AMD AE4DMA controller. This controller + provides DMA capabilities to perform high bandwidth memory to + memory and IO copy operations. It performs DMA transfer through + queue-based descriptor management. This DMA controller is intended + to be used with AMD Non-Transparent Bridge devices and not for + general purpose peripheral DMA. + +config AMD_PTDMA + tristate "AMD PassThru DMA Engine" + depends on X86_64 && PCI + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Enable support for the AMD PTDMA controller. This controller + provides DMA capabilities to perform high bandwidth memory to + memory and IO copy operations. It performs DMA transfer through + queue-based descriptor management. This DMA controller is intended + to be used with AMD Non-Transparent Bridge devices and not for + general purpose peripheral DMA. config AMD_QDMA tristate "AMD Queue-based DMA" diff --git a/drivers/dma/amd/Makefile b/drivers/dma/amd/Makefile index 37212be9364f..11278c06374d 100644 --- a/drivers/dma/amd/Makefile +++ b/drivers/dma/amd/Makefile @@ -1,3 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_AMD_AE4DMA) += ae4dma/ +obj-$(CONFIG_AMD_PTDMA) += ptdma/ obj-$(CONFIG_AMD_QDMA) += qdma/ diff --git a/drivers/dma/amd/ae4dma/Makefile b/drivers/dma/amd/ae4dma/Makefile new file mode 100644 index 000000000000..e918f85a80ec --- /dev/null +++ b/drivers/dma/amd/ae4dma/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# AMD AE4DMA driver +# + +obj-$(CONFIG_AMD_AE4DMA) += ae4dma.o + +ae4dma-objs := ae4dma-dev.o + +ae4dma-$(CONFIG_PCI) += ae4dma-pci.o diff --git a/drivers/dma/amd/ae4dma/ae4dma-dev.c b/drivers/dma/amd/ae4dma/ae4dma-dev.c new file mode 100644 index 000000000000..8de3bef41b58 --- /dev/null +++ b/drivers/dma/amd/ae4dma/ae4dma-dev.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * AMD AE4DMA driver + * + * Copyright (c) 2024, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com> + */ + +#include "ae4dma.h" + +static unsigned int max_hw_q = 1; +module_param(max_hw_q, uint, 0444); +MODULE_PARM_DESC(max_hw_q, "max hw queues supported by engine (any non-zero value, default: 1)"); + +static void ae4_pending_work(struct work_struct *work) +{ + struct ae4_cmd_queue *ae4cmd_q = container_of(work, struct ae4_cmd_queue, p_work.work); + struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q; + struct pt_cmd *cmd; + u32 cridx; + + for (;;) { + wait_event_interruptible(ae4cmd_q->q_w, + ((atomic64_read(&ae4cmd_q->done_cnt)) < + atomic64_read(&ae4cmd_q->intr_cnt))); + + atomic64_inc(&ae4cmd_q->done_cnt); + + mutex_lock(&ae4cmd_q->cmd_lock); + cridx = readl(cmd_q->reg_control + AE4_RD_IDX_OFF); + while ((ae4cmd_q->dridx != cridx) && !list_empty(&ae4cmd_q->cmd)) { + cmd = list_first_entry(&ae4cmd_q->cmd, struct pt_cmd, entry); + list_del(&cmd->entry); + + ae4_check_status_error(ae4cmd_q, ae4cmd_q->dridx); + cmd->pt_cmd_callback(cmd->data, cmd->ret); + + ae4cmd_q->q_cmd_count--; + ae4cmd_q->dridx = (ae4cmd_q->dridx + 1) % CMD_Q_LEN; + + complete_all(&ae4cmd_q->cmp); + } + mutex_unlock(&ae4cmd_q->cmd_lock); + } +} + +static irqreturn_t ae4_core_irq_handler(int irq, void *data) +{ + struct ae4_cmd_queue *ae4cmd_q = data; + struct pt_cmd_queue *cmd_q; + struct pt_device *pt; + u32 status; + + cmd_q = &ae4cmd_q->cmd_q; + pt = cmd_q->pt; + + pt->total_interrupts++; + atomic64_inc(&ae4cmd_q->intr_cnt); + + status = readl(cmd_q->reg_control + AE4_INTR_STS_OFF); + if (status & BIT(0)) { + status &= GENMASK(31, 1); + writel(status, cmd_q->reg_control + AE4_INTR_STS_OFF); + } + + wake_up(&ae4cmd_q->q_w); + + return IRQ_HANDLED; +} + +void ae4_destroy_work(struct ae4_device *ae4) +{ + struct ae4_cmd_queue *ae4cmd_q; + int i; + + for (i = 0; i < ae4->cmd_q_count; i++) { + ae4cmd_q = &ae4->ae4cmd_q[i]; + + if (!ae4cmd_q->pws) + break; + + cancel_delayed_work_sync(&ae4cmd_q->p_work); + destroy_workqueue(ae4cmd_q->pws); + } +} + +int ae4_core_init(struct ae4_device *ae4) +{ + struct pt_device *pt = &ae4->pt; + struct ae4_cmd_queue *ae4cmd_q; + struct device *dev = pt->dev; + struct pt_cmd_queue *cmd_q; + int i, ret = 0; + + writel(max_hw_q, pt->io_regs); + + for (i = 0; i < max_hw_q; i++) { + ae4cmd_q = &ae4->ae4cmd_q[i]; + ae4cmd_q->id = ae4->cmd_q_count; + ae4->cmd_q_count++; + + cmd_q = &ae4cmd_q->cmd_q; + cmd_q->pt = pt; + + cmd_q->reg_control = pt->io_regs + ((i + 1) * AE4_Q_SZ); + + ret = devm_request_irq(dev, ae4->ae4_irq[i], ae4_core_irq_handler, 0, + dev_name(pt->dev), ae4cmd_q); + if (ret) + return ret; + + cmd_q->qsize = Q_SIZE(sizeof(struct ae4dma_desc)); + + cmd_q->qbase = dmam_alloc_coherent(dev, cmd_q->qsize, &cmd_q->qbase_dma, + GFP_KERNEL); + if (!cmd_q->qbase) + return -ENOMEM; + } + + for (i = 0; i < ae4->cmd_q_count; i++) { + ae4cmd_q = &ae4->ae4cmd_q[i]; + + cmd_q = &ae4cmd_q->cmd_q; + + cmd_q->reg_control = pt->io_regs + ((i + 1) * AE4_Q_SZ); + + /* Update the device registers with queue information. */ + writel(CMD_Q_LEN, cmd_q->reg_control + AE4_MAX_IDX_OFF); + + cmd_q->qdma_tail = cmd_q->qbase_dma; + writel(lower_32_bits(cmd_q->qdma_tail), cmd_q->reg_control + AE4_Q_BASE_L_OFF); + writel(upper_32_bits(cmd_q->qdma_tail), cmd_q->reg_control + AE4_Q_BASE_H_OFF); + + INIT_LIST_HEAD(&ae4cmd_q->cmd); + init_waitqueue_head(&ae4cmd_q->q_w); + + ae4cmd_q->pws = alloc_ordered_workqueue("ae4dma_%d", WQ_MEM_RECLAIM, ae4cmd_q->id); + if (!ae4cmd_q->pws) { + ae4_destroy_work(ae4); + return -ENOMEM; + } + INIT_DELAYED_WORK(&ae4cmd_q->p_work, ae4_pending_work); + queue_delayed_work(ae4cmd_q->pws, &ae4cmd_q->p_work, usecs_to_jiffies(100)); + + init_completion(&ae4cmd_q->cmp); + } + + ret = pt_dmaengine_register(pt); + if (ret) + ae4_destroy_work(ae4); + else + ptdma_debugfs_setup(pt); + + return ret; +} diff --git a/drivers/dma/amd/ae4dma/ae4dma-pci.c b/drivers/dma/amd/ae4dma/ae4dma-pci.c new file mode 100644 index 000000000000..aad0dc4294a3 --- /dev/null +++ b/drivers/dma/amd/ae4dma/ae4dma-pci.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * AMD AE4DMA driver + * + * Copyright (c) 2024, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com> + */ + +#include "ae4dma.h" + +static int ae4_get_irqs(struct ae4_device *ae4) +{ + struct ae4_msix *ae4_msix = ae4->ae4_msix; + struct pt_device *pt = &ae4->pt; + struct device *dev = pt->dev; + struct pci_dev *pdev; + int i, v, ret; + + pdev = to_pci_dev(dev); + + for (v = 0; v < ARRAY_SIZE(ae4_msix->msix_entry); v++) + ae4_msix->msix_entry[v].entry = v; + + ret = pci_alloc_irq_vectors(pdev, v, v, PCI_IRQ_MSIX); + if (ret != v) { + if (ret > 0) + pci_free_irq_vectors(pdev); + + dev_err(dev, "could not enable MSI-X (%d), trying MSI\n", ret); + ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI); + if (ret < 0) { + dev_err(dev, "could not enable MSI (%d)\n", ret); + return ret; + } + + ret = pci_irq_vector(pdev, 0); + if (ret < 0) { + pci_free_irq_vectors(pdev); + return ret; + } + + for (i = 0; i < MAX_AE4_HW_QUEUES; i++) + ae4->ae4_irq[i] = ret; + + } else { + ae4_msix->msix_count = ret; + for (i = 0; i < MAX_AE4_HW_QUEUES; i++) + ae4->ae4_irq[i] = ae4_msix->msix_entry[i].vector; + } + + return ret; +} + +static void ae4_free_irqs(struct ae4_device *ae4) +{ + struct ae4_msix *ae4_msix = ae4->ae4_msix; + struct pt_device *pt = &ae4->pt; + struct device *dev = pt->dev; + struct pci_dev *pdev; + + pdev = to_pci_dev(dev); + + if (ae4_msix && (ae4_msix->msix_count || ae4->ae4_irq[MAX_AE4_HW_QUEUES - 1])) + pci_free_irq_vectors(pdev); +} + +static void ae4_deinit(struct ae4_device *ae4) +{ + ae4_free_irqs(ae4); +} + +static int ae4_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct device *dev = &pdev->dev; + struct ae4_device *ae4; + struct pt_device *pt; + int bar_mask; + int ret = 0; + + ae4 = devm_kzalloc(dev, sizeof(*ae4), GFP_KERNEL); + if (!ae4) + return -ENOMEM; + + ae4->ae4_msix = devm_kzalloc(dev, sizeof(struct ae4_msix), GFP_KERNEL); + if (!ae4->ae4_msix) + return -ENOMEM; + + ret = pcim_enable_device(pdev); + if (ret) + goto ae4_error; + + bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); + ret = pcim_iomap_regions(pdev, bar_mask, "ae4dma"); + if (ret) + goto ae4_error; + + pt = &ae4->pt; + pt->dev = dev; + pt->ver = AE4_DMA_VERSION; + + pt->io_regs = pcim_iomap_table(pdev)[0]; + if (!pt->io_regs) { + ret = -ENOMEM; + goto ae4_error; + } + + ret = ae4_get_irqs(ae4); + if (ret < 0) + goto ae4_error; + + pci_set_master(pdev); + + dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); + + dev_set_drvdata(dev, ae4); + + ret = ae4_core_init(ae4); + if (ret) + goto ae4_error; + + return 0; + +ae4_error: + ae4_deinit(ae4); + + return ret; +} + +static void ae4_pci_remove(struct pci_dev *pdev) +{ + struct ae4_device *ae4 = dev_get_drvdata(&pdev->dev); + + ae4_destroy_work(ae4); + ae4_deinit(ae4); +} + +static const struct pci_device_id ae4_pci_table[] = { + { PCI_VDEVICE(AMD, 0x14C8), }, + { PCI_VDEVICE(AMD, 0x14DC), }, + { PCI_VDEVICE(AMD, 0x149B), }, + /* Last entry must be zero */ + { 0, } +}; +MODULE_DEVICE_TABLE(pci, ae4_pci_table); + +static struct pci_driver ae4_pci_driver = { + .name = "ae4dma", + .id_table = ae4_pci_table, + .probe = ae4_pci_probe, + .remove = ae4_pci_remove, +}; + +module_pci_driver(ae4_pci_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("AMD AE4DMA driver"); diff --git a/drivers/dma/amd/ae4dma/ae4dma.h b/drivers/dma/amd/ae4dma/ae4dma.h new file mode 100644 index 000000000000..265c5d436008 --- /dev/null +++ b/drivers/dma/amd/ae4dma/ae4dma.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AMD AE4DMA driver + * + * Copyright (c) 2024, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com> + */ +#ifndef __AE4DMA_H__ +#define __AE4DMA_H__ + +#include <linux/device.h> +#include <linux/dmaengine.h> +#include <linux/dmapool.h> +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/pci.h> +#include <linux/spinlock.h> +#include <linux/wait.h> + +#include "../ptdma/ptdma.h" +#include "../../virt-dma.h" + +#define MAX_AE4_HW_QUEUES 16 + +#define AE4_DESC_COMPLETED 0x03 + +#define AE4_MAX_IDX_OFF 0x08 +#define AE4_RD_IDX_OFF 0x0c +#define AE4_WR_IDX_OFF 0x10 +#define AE4_INTR_STS_OFF 0x14 +#define AE4_Q_BASE_L_OFF 0x18 +#define AE4_Q_BASE_H_OFF 0x1c +#define AE4_Q_SZ 0x20 + +#define AE4_DMA_VERSION 4 +#define CMD_AE4_DESC_DW0_VAL 2 + +struct ae4_msix { + int msix_count; + struct msix_entry msix_entry[MAX_AE4_HW_QUEUES]; +}; + +struct ae4_cmd_queue { + struct ae4_device *ae4; + struct pt_cmd_queue cmd_q; + struct list_head cmd; + /* protect command operations */ + struct mutex cmd_lock; + struct delayed_work p_work; + struct workqueue_struct *pws; + struct completion cmp; + wait_queue_head_t q_w; + atomic64_t intr_cnt; + atomic64_t done_cnt; + u64 q_cmd_count; + u32 dridx; + u32 tail_wi; + u32 id; +}; + +union dwou { + u32 dw0; + struct dword0 { + u8 byte0; + u8 byte1; + u16 timestamp; + } dws; +}; + +struct dword1 { + u8 status; + u8 err_code; + u16 desc_id; +}; + +struct ae4dma_desc { + union dwou dwouv; + struct dword1 dw1; + u32 length; + u32 rsvd; + u32 src_hi; + u32 src_lo; + u32 dst_hi; + u32 dst_lo; +}; + +struct ae4_device { + struct pt_device pt; + struct ae4_msix *ae4_msix; + struct ae4_cmd_queue ae4cmd_q[MAX_AE4_HW_QUEUES]; + unsigned int ae4_irq[MAX_AE4_HW_QUEUES]; + unsigned int cmd_q_count; +}; + +int ae4_core_init(struct ae4_device *ae4); +void ae4_destroy_work(struct ae4_device *ae4); +void ae4_check_status_error(struct ae4_cmd_queue *ae4cmd_q, int idx); +#endif diff --git a/drivers/dma/ptdma/Makefile b/drivers/dma/amd/ptdma/Makefile index ce5410268a9a..ce5410268a9a 100644 --- a/drivers/dma/ptdma/Makefile +++ b/drivers/dma/amd/ptdma/Makefile diff --git a/drivers/dma/ptdma/ptdma-debugfs.c b/drivers/dma/amd/ptdma/ptdma-debugfs.c index c8307d3044a3..c7c90bbf6fd8 100644 --- a/drivers/dma/ptdma/ptdma-debugfs.c +++ b/drivers/dma/amd/ptdma/ptdma-debugfs.c @@ -13,6 +13,7 @@ #include <linux/seq_file.h> #include "ptdma.h" +#include "../ae4dma/ae4dma.h" /* DebugFS helpers */ #define RI_VERSION_NUM 0x0000003F @@ -23,11 +24,19 @@ static int pt_debugfs_info_show(struct seq_file *s, void *p) { struct pt_device *pt = s->private; + struct ae4_device *ae4; unsigned int regval; seq_printf(s, "Device name: %s\n", dev_name(pt->dev)); - seq_printf(s, " # Queues: %d\n", 1); - seq_printf(s, " # Cmds: %d\n", pt->cmd_count); + + if (pt->ver == AE4_DMA_VERSION) { + ae4 = container_of(pt, struct ae4_device, pt); + seq_printf(s, " # Queues: %d\n", ae4->cmd_q_count); + seq_printf(s, " # Cmds per queue: %d\n", CMD_Q_LEN); + } else { + seq_printf(s, " # Queues: %d\n", 1); + seq_printf(s, " # Cmds: %d\n", pt->cmd_count); + } regval = ioread32(pt->io_regs + CMD_PT_VERSION); @@ -55,6 +64,7 @@ static int pt_debugfs_stats_show(struct seq_file *s, void *p) static int pt_debugfs_queue_show(struct seq_file *s, void *p) { struct pt_cmd_queue *cmd_q = s->private; + struct pt_device *pt; unsigned int regval; if (!cmd_q) @@ -62,18 +72,24 @@ static int pt_debugfs_queue_show(struct seq_file *s, void *p) seq_printf(s, " Pass-Thru: %ld\n", cmd_q->total_pt_ops); - regval = ioread32(cmd_q->reg_control + 0x000C); - - seq_puts(s, " Enabled Interrupts:"); - if (regval & INT_EMPTY_QUEUE) - seq_puts(s, " EMPTY"); - if (regval & INT_QUEUE_STOPPED) - seq_puts(s, " STOPPED"); - if (regval & INT_ERROR) - seq_puts(s, " ERROR"); - if (regval & INT_COMPLETION) - seq_puts(s, " COMPLETION"); - seq_puts(s, "\n"); + pt = cmd_q->pt; + if (pt->ver == AE4_DMA_VERSION) { + regval = readl(cmd_q->reg_control + 0x4); + seq_printf(s, " Enabled Interrupts:: status 0x%x\n", regval); + } else { + regval = ioread32(cmd_q->reg_control + 0x000C); + + seq_puts(s, " Enabled Interrupts:"); + if (regval & INT_EMPTY_QUEUE) + seq_puts(s, " EMPTY"); + if (regval & INT_QUEUE_STOPPED) + seq_puts(s, " STOPPED"); + if (regval & INT_ERROR) + seq_puts(s, " ERROR"); + if (regval & INT_COMPLETION) + seq_puts(s, " COMPLETION"); + seq_puts(s, "\n"); + } return 0; } @@ -84,8 +100,12 @@ DEFINE_SHOW_ATTRIBUTE(pt_debugfs_stats); void ptdma_debugfs_setup(struct pt_device *pt) { - struct pt_cmd_queue *cmd_q; struct dentry *debugfs_q_instance; + struct ae4_cmd_queue *ae4cmd_q; + struct pt_cmd_queue *cmd_q; + struct ae4_device *ae4; + char name[30]; + int i; if (!debugfs_initialized()) return; @@ -96,11 +116,28 @@ void ptdma_debugfs_setup(struct pt_device *pt) debugfs_create_file("stats", 0400, pt->dma_dev.dbg_dev_root, pt, &pt_debugfs_stats_fops); - cmd_q = &pt->cmd_q; - - debugfs_q_instance = - debugfs_create_dir("q", pt->dma_dev.dbg_dev_root); - debugfs_create_file("stats", 0400, debugfs_q_instance, cmd_q, - &pt_debugfs_queue_fops); + if (pt->ver == AE4_DMA_VERSION) { + ae4 = container_of(pt, struct ae4_device, pt); + for (i = 0; i < ae4->cmd_q_count; i++) { + ae4cmd_q = &ae4->ae4cmd_q[i]; + cmd_q = &ae4cmd_q->cmd_q; + + memset(name, 0, sizeof(name)); + snprintf(name, 29, "q%d", ae4cmd_q->id); + + debugfs_q_instance = + debugfs_create_dir(name, pt->dma_dev.dbg_dev_root); + + debugfs_create_file("stats", 0400, debugfs_q_instance, cmd_q, + &pt_debugfs_queue_fops); + } + } else { + debugfs_q_instance = + debugfs_create_dir("q", pt->dma_dev.dbg_dev_root); + cmd_q = &pt->cmd_q; + debugfs_create_file("stats", 0400, debugfs_q_instance, cmd_q, + &pt_debugfs_queue_fops); + } } +EXPORT_SYMBOL_GPL(ptdma_debugfs_setup); diff --git a/drivers/dma/ptdma/ptdma-dev.c b/drivers/dma/amd/ptdma/ptdma-dev.c index a2bf13ff18b6..a2bf13ff18b6 100644 --- a/drivers/dma/ptdma/ptdma-dev.c +++ b/drivers/dma/amd/ptdma/ptdma-dev.c diff --git a/drivers/dma/ptdma/ptdma-dmaengine.c b/drivers/dma/amd/ptdma/ptdma-dmaengine.c index f79240734807..35c84ec9608b 100644 --- a/drivers/dma/ptdma/ptdma-dmaengine.c +++ b/drivers/dma/amd/ptdma/ptdma-dmaengine.c @@ -9,9 +9,58 @@ * Author: Gary R Hook <gary.hook@amd.com> */ +#include <linux/bitfield.h> #include "ptdma.h" -#include "../dmaengine.h" -#include "../virt-dma.h" +#include "../ae4dma/ae4dma.h" +#include "../../dmaengine.h" + +static char *ae4_error_codes[] = { + "", + "ERR 01: INVALID HEADER DW0", + "ERR 02: INVALID STATUS", + "ERR 03: INVALID LENGTH - 4 BYTE ALIGNMENT", + "ERR 04: INVALID SRC ADDR - 4 BYTE ALIGNMENT", + "ERR 05: INVALID DST ADDR - 4 BYTE ALIGNMENT", + "ERR 06: INVALID ALIGNMENT", + "ERR 07: INVALID DESCRIPTOR", +}; + +static void ae4_log_error(struct pt_device *d, int e) +{ + /* ERR 01 - 07 represents Invalid AE4 errors */ + if (e <= 7) + dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", ae4_error_codes[e], e); + /* ERR 08 - 15 represents Invalid Descriptor errors */ + else if (e > 7 && e <= 15) + dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "INVALID DESCRIPTOR", e); + /* ERR 16 - 31 represents Firmware errors */ + else if (e > 15 && e <= 31) + dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "FIRMWARE ERROR", e); + /* ERR 32 - 63 represents Fatal errors */ + else if (e > 31 && e <= 63) + dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "FATAL ERROR", e); + /* ERR 64 - 255 represents PTE errors */ + else if (e > 63 && e <= 255) + dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "PTE ERROR", e); + else + dev_info(d->dev, "Unknown AE4DMA error"); +} + +void ae4_check_status_error(struct ae4_cmd_queue *ae4cmd_q, int idx) +{ + struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q; + struct ae4dma_desc desc; + u8 status; + + memcpy(&desc, &cmd_q->qbase[idx], sizeof(struct ae4dma_desc)); + status = desc.dw1.status; + if (status && status != AE4_DESC_COMPLETED) { + cmd_q->cmd_error = desc.dw1.err_code; + if (cmd_q->cmd_error) + ae4_log_error(cmd_q->pt, cmd_q->cmd_error); + } +} +EXPORT_SYMBOL_GPL(ae4_check_status_error); static inline struct pt_dma_chan *to_pt_chan(struct dma_chan *dma_chan) { @@ -45,7 +94,71 @@ static void pt_do_cleanup(struct virt_dma_desc *vd) kmem_cache_free(pt->dma_desc_cache, desc); } -static int pt_dma_start_desc(struct pt_dma_desc *desc) +static struct pt_cmd_queue *pt_get_cmd_queue(struct pt_device *pt, struct pt_dma_chan *chan) +{ + struct ae4_cmd_queue *ae4cmd_q; + struct pt_cmd_queue *cmd_q; + struct ae4_device *ae4; + + if (pt->ver == AE4_DMA_VERSION) { + ae4 = container_of(pt, struct ae4_device, pt); + ae4cmd_q = &ae4->ae4cmd_q[chan->id]; + cmd_q = &ae4cmd_q->cmd_q; + } else { + cmd_q = &pt->cmd_q; + } + + return cmd_q; +} + +static int ae4_core_execute_cmd(struct ae4dma_desc *desc, struct ae4_cmd_queue *ae4cmd_q) +{ + bool soc = FIELD_GET(DWORD0_SOC, desc->dwouv.dw0); + struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q; + + if (soc) { + desc->dwouv.dw0 |= FIELD_PREP(DWORD0_IOC, desc->dwouv.dw0); + desc->dwouv.dw0 &= ~DWORD0_SOC; + } + + mutex_lock(&ae4cmd_q->cmd_lock); + memcpy(&cmd_q->qbase[ae4cmd_q->tail_wi], desc, sizeof(struct ae4dma_desc)); + ae4cmd_q->q_cmd_count++; + ae4cmd_q->tail_wi = (ae4cmd_q->tail_wi + 1) % CMD_Q_LEN; + writel(ae4cmd_q->tail_wi, cmd_q->reg_control + AE4_WR_IDX_OFF); + mutex_unlock(&ae4cmd_q->cmd_lock); + + wake_up(&ae4cmd_q->q_w); + + return 0; +} + +static int pt_core_perform_passthru_ae4(struct pt_cmd_queue *cmd_q, + struct pt_passthru_engine *pt_engine) +{ + struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct ae4_cmd_queue, cmd_q); + struct ae4dma_desc desc; + + cmd_q->cmd_error = 0; + cmd_q->total_pt_ops++; + memset(&desc, 0, sizeof(desc)); + desc.dwouv.dws.byte0 = CMD_AE4_DESC_DW0_VAL; + + desc.dw1.status = 0; + desc.dw1.err_code = 0; + desc.dw1.desc_id = 0; + + desc.length = pt_engine->src_len; + + desc.src_lo = upper_32_bits(pt_engine->src_dma); + desc.src_hi = lower_32_bits(pt_engine->src_dma); + desc.dst_lo = upper_32_bits(pt_engine->dst_dma); + desc.dst_hi = lower_32_bits(pt_engine->dst_dma); + + return ae4_core_execute_cmd(&desc, ae4cmd_q); +} + +static int pt_dma_start_desc(struct pt_dma_desc *desc, struct pt_dma_chan *chan) { struct pt_passthru_engine *pt_engine; struct pt_device *pt; @@ -56,13 +169,18 @@ static int pt_dma_start_desc(struct pt_dma_desc *desc) pt_cmd = &desc->pt_cmd; pt = pt_cmd->pt; - cmd_q = &pt->cmd_q; + + cmd_q = pt_get_cmd_queue(pt, chan); + pt_engine = &pt_cmd->passthru; pt->tdata.cmd = pt_cmd; /* Execute the command */ - pt_cmd->ret = pt_core_perform_passthru(cmd_q, pt_engine); + if (pt->ver == AE4_DMA_VERSION) + pt_cmd->ret = pt_core_perform_passthru_ae4(cmd_q, pt_engine); + else + pt_cmd->ret = pt_core_perform_passthru(cmd_q, pt_engine); return 0; } @@ -151,7 +269,7 @@ static void pt_cmd_callback(void *data, int err) if (!desc) break; - ret = pt_dma_start_desc(desc); + ret = pt_dma_start_desc(desc, chan); if (!ret) break; @@ -186,7 +304,10 @@ static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan, { struct pt_dma_chan *chan = to_pt_chan(dma_chan); struct pt_passthru_engine *pt_engine; + struct pt_device *pt = chan->pt; + struct ae4_cmd_queue *ae4cmd_q; struct pt_dma_desc *desc; + struct ae4_device *ae4; struct pt_cmd *pt_cmd; desc = pt_alloc_dma_desc(chan, flags); @@ -194,7 +315,7 @@ static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan, return NULL; pt_cmd = &desc->pt_cmd; - pt_cmd->pt = chan->pt; + pt_cmd->pt = pt; pt_engine = &pt_cmd->passthru; pt_cmd->engine = PT_ENGINE_PASSTHRU; pt_engine->src_dma = src; @@ -205,6 +326,14 @@ static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan, desc->len = len; + if (pt->ver == AE4_DMA_VERSION) { + ae4 = container_of(pt, struct ae4_device, pt); + ae4cmd_q = &ae4->ae4cmd_q[chan->id]; + mutex_lock(&ae4cmd_q->cmd_lock); + list_add_tail(&pt_cmd->entry, &ae4cmd_q->cmd); + mutex_unlock(&ae4cmd_q->cmd_lock); + } + return desc; } @@ -258,24 +387,43 @@ static void pt_issue_pending(struct dma_chan *dma_chan) pt_cmd_callback(desc, 0); } +static void pt_check_status_trans_ae4(struct pt_device *pt, struct pt_cmd_queue *cmd_q) +{ + struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct ae4_cmd_queue, cmd_q); + int i; + + for (i = 0; i < CMD_Q_LEN; i++) + ae4_check_status_error(ae4cmd_q, i); +} + static enum dma_status pt_tx_status(struct dma_chan *c, dma_cookie_t cookie, struct dma_tx_state *txstate) { - struct pt_device *pt = to_pt_chan(c)->pt; - struct pt_cmd_queue *cmd_q = &pt->cmd_q; + struct pt_dma_chan *chan = to_pt_chan(c); + struct pt_device *pt = chan->pt; + struct pt_cmd_queue *cmd_q; + + cmd_q = pt_get_cmd_queue(pt, chan); + + if (pt->ver == AE4_DMA_VERSION) + pt_check_status_trans_ae4(pt, cmd_q); + else + pt_check_status_trans(pt, cmd_q); - pt_check_status_trans(pt, cmd_q); return dma_cookie_status(c, cookie, txstate); } static int pt_pause(struct dma_chan *dma_chan) { struct pt_dma_chan *chan = to_pt_chan(dma_chan); + struct pt_device *pt = chan->pt; + struct pt_cmd_queue *cmd_q; unsigned long flags; spin_lock_irqsave(&chan->vc.lock, flags); - pt_stop_queue(&chan->pt->cmd_q); + cmd_q = pt_get_cmd_queue(pt, chan); + pt_stop_queue(cmd_q); spin_unlock_irqrestore(&chan->vc.lock, flags); return 0; @@ -285,10 +433,13 @@ static int pt_resume(struct dma_chan *dma_chan) { struct pt_dma_chan *chan = to_pt_chan(dma_chan); struct pt_dma_desc *desc = NULL; + struct pt_device *pt = chan->pt; + struct pt_cmd_queue *cmd_q; unsigned long flags; spin_lock_irqsave(&chan->vc.lock, flags); - pt_start_queue(&chan->pt->cmd_q); + cmd_q = pt_get_cmd_queue(pt, chan); + pt_start_queue(cmd_q); desc = pt_next_dma_desc(chan); spin_unlock_irqrestore(&chan->vc.lock, flags); @@ -302,11 +453,17 @@ static int pt_resume(struct dma_chan *dma_chan) static int pt_terminate_all(struct dma_chan *dma_chan) { struct pt_dma_chan *chan = to_pt_chan(dma_chan); + struct pt_device *pt = chan->pt; + struct pt_cmd_queue *cmd_q; unsigned long flags; - struct pt_cmd_queue *cmd_q = &chan->pt->cmd_q; LIST_HEAD(head); - iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_control + 0x0010); + cmd_q = pt_get_cmd_queue(pt, chan); + if (pt->ver == AE4_DMA_VERSION) + pt_stop_queue(cmd_q); + else + iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_control + 0x0010); + spin_lock_irqsave(&chan->vc.lock, flags); vchan_get_all_descriptors(&chan->vc, &head); spin_unlock_irqrestore(&chan->vc.lock, flags); @@ -319,14 +476,24 @@ static int pt_terminate_all(struct dma_chan *dma_chan) int pt_dmaengine_register(struct pt_device *pt) { - struct pt_dma_chan *chan; struct dma_device *dma_dev = &pt->dma_dev; - char *cmd_cache_name; + struct ae4_cmd_queue *ae4cmd_q = NULL; + struct ae4_device *ae4 = NULL; + struct pt_dma_chan *chan; char *desc_cache_name; - int ret; + char *cmd_cache_name; + int ret, i; + + if (pt->ver == AE4_DMA_VERSION) + ae4 = container_of(pt, struct ae4_device, pt); + + if (ae4) + pt->pt_dma_chan = devm_kcalloc(pt->dev, ae4->cmd_q_count, + sizeof(*pt->pt_dma_chan), GFP_KERNEL); + else + pt->pt_dma_chan = devm_kzalloc(pt->dev, sizeof(*pt->pt_dma_chan), + GFP_KERNEL); - pt->pt_dma_chan = devm_kzalloc(pt->dev, sizeof(*pt->pt_dma_chan), - GFP_KERNEL); if (!pt->pt_dma_chan) return -ENOMEM; @@ -368,9 +535,6 @@ int pt_dmaengine_register(struct pt_device *pt) INIT_LIST_HEAD(&dma_dev->channels); - chan = pt->pt_dma_chan; - chan->pt = pt; - /* Set base and prep routines */ dma_dev->device_free_chan_resources = pt_free_chan_resources; dma_dev->device_prep_dma_memcpy = pt_prep_dma_memcpy; @@ -382,8 +546,21 @@ int pt_dmaengine_register(struct pt_device *pt) dma_dev->device_terminate_all = pt_terminate_all; dma_dev->device_synchronize = pt_synchronize; - chan->vc.desc_free = pt_do_cleanup; - vchan_init(&chan->vc, dma_dev); + if (ae4) { + for (i = 0; i < ae4->cmd_q_count; i++) { + chan = pt->pt_dma_chan + i; + ae4cmd_q = &ae4->ae4cmd_q[i]; + chan->id = ae4cmd_q->id; + chan->pt = pt; + chan->vc.desc_free = pt_do_cleanup; + vchan_init(&chan->vc, dma_dev); + } + } else { + chan = pt->pt_dma_chan; + chan->pt = pt; + chan->vc.desc_free = pt_do_cleanup; + vchan_init(&chan->vc, dma_dev); + } ret = dma_async_device_register(dma_dev); if (ret) @@ -399,6 +576,7 @@ err_cache: return ret; } +EXPORT_SYMBOL_GPL(pt_dmaengine_register); void pt_dmaengine_unregister(struct pt_device *pt) { diff --git a/drivers/dma/ptdma/ptdma-pci.c b/drivers/dma/amd/ptdma/ptdma-pci.c index 22739ff0c3c5..22739ff0c3c5 100644 --- a/drivers/dma/ptdma/ptdma-pci.c +++ b/drivers/dma/amd/ptdma/ptdma-pci.c diff --git a/drivers/dma/ptdma/ptdma.h b/drivers/dma/amd/ptdma/ptdma.h index 39bc37268235..0a7939105e51 100644 --- a/drivers/dma/ptdma/ptdma.h +++ b/drivers/dma/amd/ptdma/ptdma.h @@ -22,7 +22,7 @@ #include <linux/wait.h> #include <linux/dmapool.h> -#include "../virt-dma.h" +#include "../../virt-dma.h" #define MAX_PT_NAME_LEN 16 #define MAX_DMAPOOL_NAME_LEN 32 @@ -184,6 +184,7 @@ struct pt_dma_desc { struct pt_dma_chan { struct virt_dma_chan vc; struct pt_device *pt; + u32 id; }; struct pt_cmd_queue { @@ -262,6 +263,7 @@ struct pt_device { unsigned long total_interrupts; struct pt_tasklet_data tdata; + int ver; }; /* diff --git a/drivers/dma/amd/qdma/qdma.c b/drivers/dma/amd/qdma/qdma.c index 66f00ad67351..8fb2d5e1df20 100644 --- a/drivers/dma/amd/qdma/qdma.c +++ b/drivers/dma/amd/qdma/qdma.c @@ -283,16 +283,20 @@ static int qdma_check_queue_status(struct qdma_device *qdev, static int qdma_clear_queue_context(const struct qdma_queue *queue) { - enum qdma_ctxt_type h2c_types[] = { QDMA_CTXT_DESC_SW_H2C, - QDMA_CTXT_DESC_HW_H2C, - QDMA_CTXT_DESC_CR_H2C, - QDMA_CTXT_PFTCH, }; - enum qdma_ctxt_type c2h_types[] = { QDMA_CTXT_DESC_SW_C2H, - QDMA_CTXT_DESC_HW_C2H, - QDMA_CTXT_DESC_CR_C2H, - QDMA_CTXT_PFTCH, }; + static const enum qdma_ctxt_type h2c_types[] = { + QDMA_CTXT_DESC_SW_H2C, + QDMA_CTXT_DESC_HW_H2C, + QDMA_CTXT_DESC_CR_H2C, + QDMA_CTXT_PFTCH, + }; + static const enum qdma_ctxt_type c2h_types[] = { + QDMA_CTXT_DESC_SW_C2H, + QDMA_CTXT_DESC_HW_C2H, + QDMA_CTXT_DESC_CR_C2H, + QDMA_CTXT_PFTCH, + }; struct qdma_device *qdev = queue->qdev; - enum qdma_ctxt_type *type; + const enum qdma_ctxt_type *type; int ret, num, i; if (queue->dir == DMA_MEM_TO_DEV) { diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index 7ba52dee40a9..20b10c15c696 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -875,6 +875,27 @@ static struct dma_chan *bcm2835_dma_xlate(struct of_phandle_args *spec, return chan; } +static int bcm2835_dma_suspend_late(struct device *dev) +{ + struct bcm2835_dmadev *od = dev_get_drvdata(dev); + struct bcm2835_chan *c, *next; + + list_for_each_entry_safe(c, next, &od->ddev.channels, + vc.chan.device_node) { + void __iomem *chan_base = c->chan_base; + + /* Check if DMA channel is busy */ + if (readl(chan_base + BCM2835_DMA_ADDR)) + return -EBUSY; + } + + return 0; +} + +static const struct dev_pm_ops bcm2835_dma_pm_ops = { + SET_LATE_SYSTEM_SLEEP_PM_OPS(bcm2835_dma_suspend_late, NULL) +}; + static int bcm2835_dma_probe(struct platform_device *pdev) { struct bcm2835_dmadev *od; @@ -1033,6 +1054,7 @@ static struct platform_driver bcm2835_dma_driver = { .driver = { .name = "bcm2835-dma", .of_match_table = of_match_ptr(bcm2835_dma_of_match), + .pm = pm_ptr(&bcm2835_dma_pm_ops), }, }; diff --git a/drivers/dma/fsl-edma-common.c b/drivers/dma/fsl-edma-common.c index b7f15ab96855..443b2430466c 100644 --- a/drivers/dma/fsl-edma-common.c +++ b/drivers/dma/fsl-edma-common.c @@ -480,8 +480,8 @@ void fsl_edma_fill_tcd(struct fsl_edma_chan *fsl_chan, bool disable_req, bool enable_sg) { struct dma_slave_config *cfg = &fsl_chan->cfg; + u32 burst = 0; u16 csr = 0; - u32 burst; /* * eDMA hardware SGs require the TCDs to be stored in little @@ -496,16 +496,30 @@ void fsl_edma_fill_tcd(struct fsl_edma_chan *fsl_chan, fsl_edma_set_tcd_to_le(fsl_chan, tcd, soff, soff); - if (fsl_chan->is_multi_fifo) { - /* set mloff to support multiple fifo */ - burst = cfg->direction == DMA_DEV_TO_MEM ? - cfg->src_maxburst : cfg->dst_maxburst; - nbytes |= EDMA_V3_TCD_NBYTES_MLOFF(-(burst * 4)); - /* enable DMLOE/SMLOE */ - if (cfg->direction == DMA_MEM_TO_DEV) { + /* If we expect to have either multi_fifo or a port window size, + * we will use minor loop offset, meaning bits 29-10 will be used for + * address offset, while bits 9-0 will be used to tell DMA how much + * data to read from addr. + * If we don't have either of those, will use a major loop reading from addr + * nbytes (29bits). + */ + if (cfg->direction == DMA_MEM_TO_DEV) { + if (fsl_chan->is_multi_fifo) + burst = cfg->dst_maxburst * 4; + if (cfg->dst_port_window_size) + burst = cfg->dst_port_window_size * cfg->dst_addr_width; + if (burst) { + nbytes |= EDMA_V3_TCD_NBYTES_MLOFF(-burst); nbytes |= EDMA_V3_TCD_NBYTES_DMLOE; nbytes &= ~EDMA_V3_TCD_NBYTES_SMLOE; - } else { + } + } else { + if (fsl_chan->is_multi_fifo) + burst = cfg->src_maxburst * 4; + if (cfg->src_port_window_size) + burst = cfg->src_port_window_size * cfg->src_addr_width; + if (burst) { + nbytes |= EDMA_V3_TCD_NBYTES_MLOFF(-burst); nbytes |= EDMA_V3_TCD_NBYTES_SMLOE; nbytes &= ~EDMA_V3_TCD_NBYTES_DMLOE; } @@ -623,11 +637,15 @@ struct dma_async_tx_descriptor *fsl_edma_prep_dma_cyclic( dst_addr = fsl_chan->dma_dev_addr; soff = fsl_chan->cfg.dst_addr_width; doff = fsl_chan->is_multi_fifo ? 4 : 0; + if (fsl_chan->cfg.dst_port_window_size) + doff = fsl_chan->cfg.dst_addr_width; } else if (direction == DMA_DEV_TO_MEM) { src_addr = fsl_chan->dma_dev_addr; dst_addr = dma_buf_next; soff = fsl_chan->is_multi_fifo ? 4 : 0; doff = fsl_chan->cfg.src_addr_width; + if (fsl_chan->cfg.src_port_window_size) + soff = fsl_chan->cfg.src_addr_width; } else { /* DMA_DEV_TO_DEV */ src_addr = fsl_chan->cfg.src_addr; diff --git a/drivers/dma/fsl-edma-common.h b/drivers/dma/fsl-edma-common.h index fe8f103d4a63..10a5565ddfd7 100644 --- a/drivers/dma/fsl-edma-common.h +++ b/drivers/dma/fsl-edma-common.h @@ -68,6 +68,8 @@ #define EDMA_V3_CH_CSR_EEI BIT(2) #define EDMA_V3_CH_CSR_DONE BIT(30) #define EDMA_V3_CH_CSR_ACTIVE BIT(31) +#define EDMA_V3_CH_ES_ERR BIT(31) +#define EDMA_V3_MP_ES_VLD BIT(31) enum fsl_edma_pm_state { RUNNING = 0, @@ -241,6 +243,7 @@ struct fsl_edma_engine { const struct fsl_edma_drvdata *drvdata; u32 n_chans; int txirq; + int txirq_16_31; int errirq; bool big_endian; struct edma_regs regs; diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c index 1a613236b3e4..f989b6c9c0a9 100644 --- a/drivers/dma/fsl-edma-main.c +++ b/drivers/dma/fsl-edma-main.c @@ -3,10 +3,11 @@ * drivers/dma/fsl-edma.c * * Copyright 2013-2014 Freescale Semiconductor, Inc. + * Copyright 2024 NXP * * Driver for the Freescale eDMA engine with flexible channel multiplexing * capability for DMA request sources. The eDMA block can be found on some - * Vybrid and Layerscape SoCs. + * Vybrid, Layerscape and S32G SoCs. */ #include <dt-bindings/dma/fsl-edma.h> @@ -72,6 +73,60 @@ static irqreturn_t fsl_edma2_tx_handler(int irq, void *devi_id) return fsl_edma_tx_handler(irq, fsl_chan->edma); } +static irqreturn_t fsl_edma3_or_tx_handler(int irq, void *dev_id, + u8 start, u8 end) +{ + struct fsl_edma_engine *fsl_edma = dev_id; + struct fsl_edma_chan *chan; + int i; + + end = min(end, fsl_edma->n_chans); + + for (i = start; i < end; i++) { + chan = &fsl_edma->chans[i]; + + fsl_edma3_tx_handler(irq, chan); + } + + return IRQ_HANDLED; +} + +static irqreturn_t fsl_edma3_tx_0_15_handler(int irq, void *dev_id) +{ + return fsl_edma3_or_tx_handler(irq, dev_id, 0, 16); +} + +static irqreturn_t fsl_edma3_tx_16_31_handler(int irq, void *dev_id) +{ + return fsl_edma3_or_tx_handler(irq, dev_id, 16, 32); +} + +static irqreturn_t fsl_edma3_or_err_handler(int irq, void *dev_id) +{ + struct fsl_edma_engine *fsl_edma = dev_id; + struct edma_regs *regs = &fsl_edma->regs; + unsigned int err, ch, ch_es; + struct fsl_edma_chan *chan; + + err = edma_readl(fsl_edma, regs->es); + if (!(err & EDMA_V3_MP_ES_VLD)) + return IRQ_NONE; + + for (ch = 0; ch < fsl_edma->n_chans; ch++) { + chan = &fsl_edma->chans[ch]; + + ch_es = edma_readl_chreg(chan, ch_es); + if (!(ch_es & EDMA_V3_CH_ES_ERR)) + continue; + + edma_writel_chreg(chan, EDMA_V3_CH_ES_ERR, ch_es); + fsl_edma_disable_request(chan); + fsl_edma->chans[ch].status = DMA_ERROR; + } + + return IRQ_HANDLED; +} + static irqreturn_t fsl_edma_err_handler(int irq, void *dev_id) { struct fsl_edma_engine *fsl_edma = dev_id; @@ -274,6 +329,49 @@ static int fsl_edma3_irq_init(struct platform_device *pdev, struct fsl_edma_engi return 0; } +static int fsl_edma3_or_irq_init(struct platform_device *pdev, + struct fsl_edma_engine *fsl_edma) +{ + int ret; + + fsl_edma->txirq = platform_get_irq_byname(pdev, "tx-0-15"); + if (fsl_edma->txirq < 0) + return fsl_edma->txirq; + + fsl_edma->txirq_16_31 = platform_get_irq_byname(pdev, "tx-16-31"); + if (fsl_edma->txirq_16_31 < 0) + return fsl_edma->txirq_16_31; + + fsl_edma->errirq = platform_get_irq_byname(pdev, "err"); + if (fsl_edma->errirq < 0) + return fsl_edma->errirq; + + ret = devm_request_irq(&pdev->dev, fsl_edma->txirq, + fsl_edma3_tx_0_15_handler, 0, "eDMA tx0_15", + fsl_edma); + if (ret) + return dev_err_probe(&pdev->dev, ret, + "Can't register eDMA tx0_15 IRQ.\n"); + + if (fsl_edma->n_chans > 16) { + ret = devm_request_irq(&pdev->dev, fsl_edma->txirq_16_31, + fsl_edma3_tx_16_31_handler, 0, + "eDMA tx16_31", fsl_edma); + if (ret) + return dev_err_probe(&pdev->dev, ret, + "Can't register eDMA tx16_31 IRQ.\n"); + } + + ret = devm_request_irq(&pdev->dev, fsl_edma->errirq, + fsl_edma3_or_err_handler, 0, "eDMA err", + fsl_edma); + if (ret) + return dev_err_probe(&pdev->dev, ret, + "Can't register eDMA err IRQ.\n"); + + return 0; +} + static int fsl_edma2_irq_init(struct platform_device *pdev, struct fsl_edma_engine *fsl_edma) @@ -404,6 +502,14 @@ static struct fsl_edma_drvdata imx95_data5 = { .setup_irq = fsl_edma3_irq_init, }; +static const struct fsl_edma_drvdata s32g2_data = { + .dmamuxs = DMAMUX_NR, + .chreg_space_sz = EDMA_TCD, + .chreg_off = 0x4000, + .flags = FSL_EDMA_DRV_EDMA3 | FSL_EDMA_DRV_MUX_SWAP, + .setup_irq = fsl_edma3_or_irq_init, +}; + static const struct of_device_id fsl_edma_dt_ids[] = { { .compatible = "fsl,vf610-edma", .data = &vf610_data}, { .compatible = "fsl,ls1028a-edma", .data = &ls1028a_data}, @@ -413,6 +519,7 @@ static const struct of_device_id fsl_edma_dt_ids[] = { { .compatible = "fsl,imx93-edma3", .data = &imx93_data3}, { .compatible = "fsl,imx93-edma4", .data = &imx93_data4}, { .compatible = "fsl,imx95-edma5", .data = &imx95_data5}, + { .compatible = "nxp,s32g2-edma", .data = &s32g2_data}, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, fsl_edma_dt_ids); @@ -545,10 +652,6 @@ static int fsl_edma_probe(struct platform_device *pdev) for (i = 0; i < fsl_edma->drvdata->dmamuxs; i++) { char clkname[32]; - /* eDMAv3 mux register move to TCD area if ch_mux exist */ - if (drvdata->flags & FSL_EDMA_DRV_SPLIT_REG) - break; - fsl_edma->muxbase[i] = devm_platform_ioremap_resource(pdev, 1 + i); if (IS_ERR(fsl_edma->muxbase[i])) { @@ -677,7 +780,7 @@ static int fsl_edma_probe(struct platform_device *pdev) } ret = of_dma_controller_register(np, - drvdata->flags & FSL_EDMA_DRV_SPLIT_REG ? fsl_edma3_xlate : fsl_edma_xlate, + drvdata->dmamuxs ? fsl_edma_xlate : fsl_edma3_xlate, fsl_edma); if (ret) { dev_err(&pdev->dev, diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 57f1bf2ab20b..ff94ee892339 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -28,7 +28,6 @@ struct idxd_cdev_context { * global to avoid conflict file names. */ static DEFINE_IDA(file_ida); -static DEFINE_MUTEX(ida_lock); /* * ictx is an array based off of accelerator types. enum idxd_type @@ -123,9 +122,7 @@ static void idxd_file_dev_release(struct device *dev) struct idxd_device *idxd = wq->idxd; int rc; - mutex_lock(&ida_lock); ida_free(&file_ida, ctx->id); - mutex_unlock(&ida_lock); /* Wait for in-flight operations to complete. */ if (wq_shared(wq)) { @@ -284,9 +281,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) } idxd_cdev = wq->idxd_cdev; - mutex_lock(&ida_lock); ctx->id = ida_alloc(&file_ida, GFP_KERNEL); - mutex_unlock(&ida_lock); if (ctx->id < 0) { dev_warn(dev, "ida alloc failure\n"); goto failed_ida; diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index d84e21daa991..214b8039439f 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -374,6 +374,17 @@ struct idxd_device { struct dentry *dbgfs_evl_file; bool user_submission_safe; + + struct idxd_saved_states *idxd_saved; +}; + +struct idxd_saved_states { + struct idxd_device saved_idxd; + struct idxd_evl saved_evl; + struct idxd_engine **saved_engines; + struct idxd_wq **saved_wqs; + struct idxd_group **saved_groups; + unsigned long *saved_wq_enable_map; }; static inline unsigned int evl_ent_size(struct idxd_device *idxd) @@ -725,8 +736,6 @@ static inline void idxd_desc_complete(struct idxd_desc *desc, &desc->txd, &status); } -int idxd_register_bus_type(void); -void idxd_unregister_bus_type(void); int idxd_register_devices(struct idxd_device *idxd); void idxd_unregister_devices(struct idxd_device *idxd); void idxd_wqs_quiesce(struct idxd_device *idxd); @@ -742,6 +751,8 @@ void idxd_unmask_error_interrupts(struct idxd_device *idxd); /* device control */ int idxd_device_drv_probe(struct idxd_dev *idxd_dev); +int idxd_pci_probe_alloc(struct idxd_device *idxd, struct pci_dev *pdev, + const struct pci_device_id *id); void idxd_device_drv_remove(struct idxd_dev *idxd_dev); int idxd_drv_enable_wq(struct idxd_wq *wq); void idxd_drv_disable_wq(struct idxd_wq *wq); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 140f8d772bee..b946f78f85e1 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -78,6 +78,8 @@ static struct pci_device_id idxd_pci_tbl[] = { { PCI_DEVICE_DATA(INTEL, IAX_SPR0, &idxd_driver_data[IDXD_TYPE_IAX]) }, /* IAA on DMR platforms */ { PCI_DEVICE_DATA(INTEL, IAA_DMR, &idxd_driver_data[IDXD_TYPE_IAX]) }, + /* IAA PTL platforms */ + { PCI_DEVICE_DATA(INTEL, IAA_PTL, &idxd_driver_data[IDXD_TYPE_IAX]) }, { 0, } }; MODULE_DEVICE_TABLE(pci, idxd_pci_tbl); @@ -723,67 +725,464 @@ static void idxd_cleanup(struct idxd_device *idxd) idxd_disable_sva(idxd->pdev); } -static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +/* + * Attach IDXD device to IDXD driver. + */ +static int idxd_bind(struct device_driver *drv, const char *buf) { - struct device *dev = &pdev->dev; - struct idxd_device *idxd; - struct idxd_driver_data *data = (struct idxd_driver_data *)id->driver_data; + const struct bus_type *bus = drv->bus; + struct device *dev; + int err = -ENODEV; + + dev = bus_find_device_by_name(bus, NULL, buf); + if (dev) + err = device_driver_attach(drv, dev); + + put_device(dev); + + return err; +} + +/* + * Detach IDXD device from driver. + */ +static void idxd_unbind(struct device_driver *drv, const char *buf) +{ + const struct bus_type *bus = drv->bus; + struct device *dev; + + dev = bus_find_device_by_name(bus, NULL, buf); + if (dev && dev->driver == drv) + device_release_driver(dev); + + put_device(dev); +} + +#define idxd_free_saved_configs(saved_configs, count) \ + do { \ + int i; \ + \ + for (i = 0; i < (count); i++) \ + kfree(saved_configs[i]); \ + } while (0) + +static void idxd_free_saved(struct idxd_group **saved_groups, + struct idxd_engine **saved_engines, + struct idxd_wq **saved_wqs, + struct idxd_device *idxd) +{ + if (saved_groups) + idxd_free_saved_configs(saved_groups, idxd->max_groups); + if (saved_engines) + idxd_free_saved_configs(saved_engines, idxd->max_engines); + if (saved_wqs) + idxd_free_saved_configs(saved_wqs, idxd->max_wqs); +} + +/* + * Save IDXD device configurations including engines, groups, wqs etc. + * The saved configurations can be restored when needed. + */ +static int idxd_device_config_save(struct idxd_device *idxd, + struct idxd_saved_states *idxd_saved) +{ + struct device *dev = &idxd->pdev->dev; + int i; + + memcpy(&idxd_saved->saved_idxd, idxd, sizeof(*idxd)); + + if (idxd->evl) { + memcpy(&idxd_saved->saved_evl, idxd->evl, + sizeof(struct idxd_evl)); + } + + struct idxd_group **saved_groups __free(kfree) = + kcalloc_node(idxd->max_groups, + sizeof(struct idxd_group *), + GFP_KERNEL, dev_to_node(dev)); + if (!saved_groups) + return -ENOMEM; + + for (i = 0; i < idxd->max_groups; i++) { + struct idxd_group *saved_group __free(kfree) = + kzalloc_node(sizeof(*saved_group), GFP_KERNEL, + dev_to_node(dev)); + + if (!saved_group) { + /* Free saved groups */ + idxd_free_saved(saved_groups, NULL, NULL, idxd); + + return -ENOMEM; + } + + memcpy(saved_group, idxd->groups[i], sizeof(*saved_group)); + saved_groups[i] = no_free_ptr(saved_group); + } + + struct idxd_engine **saved_engines = + kcalloc_node(idxd->max_engines, + sizeof(struct idxd_engine *), + GFP_KERNEL, dev_to_node(dev)); + if (!saved_engines) { + /* Free saved groups */ + idxd_free_saved(saved_groups, NULL, NULL, idxd); + + return -ENOMEM; + } + for (i = 0; i < idxd->max_engines; i++) { + struct idxd_engine *saved_engine __free(kfree) = + kzalloc_node(sizeof(*saved_engine), GFP_KERNEL, + dev_to_node(dev)); + if (!saved_engine) { + /* Free saved groups and engines */ + idxd_free_saved(saved_groups, saved_engines, NULL, + idxd); + + return -ENOMEM; + } + + memcpy(saved_engine, idxd->engines[i], sizeof(*saved_engine)); + saved_engines[i] = no_free_ptr(saved_engine); + } + + unsigned long *saved_wq_enable_map __free(bitmap) = + bitmap_zalloc_node(idxd->max_wqs, GFP_KERNEL, + dev_to_node(dev)); + if (!saved_wq_enable_map) { + /* Free saved groups and engines */ + idxd_free_saved(saved_groups, saved_engines, NULL, idxd); + + return -ENOMEM; + } + + bitmap_copy(saved_wq_enable_map, idxd->wq_enable_map, idxd->max_wqs); + + struct idxd_wq **saved_wqs __free(kfree) = + kcalloc_node(idxd->max_wqs, sizeof(struct idxd_wq *), + GFP_KERNEL, dev_to_node(dev)); + if (!saved_wqs) { + /* Free saved groups and engines */ + idxd_free_saved(saved_groups, saved_engines, NULL, idxd); + + return -ENOMEM; + } + + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *saved_wq __free(kfree) = + kzalloc_node(sizeof(*saved_wq), GFP_KERNEL, + dev_to_node(dev)); + struct idxd_wq *wq; + + if (!saved_wq) { + /* Free saved groups, engines, and wqs */ + idxd_free_saved(saved_groups, saved_engines, saved_wqs, + idxd); + + return -ENOMEM; + } + + if (!test_bit(i, saved_wq_enable_map)) + continue; + + wq = idxd->wqs[i]; + mutex_lock(&wq->wq_lock); + memcpy(saved_wq, wq, sizeof(*saved_wq)); + saved_wqs[i] = no_free_ptr(saved_wq); + mutex_unlock(&wq->wq_lock); + } + + /* Save configurations */ + idxd_saved->saved_groups = no_free_ptr(saved_groups); + idxd_saved->saved_engines = no_free_ptr(saved_engines); + idxd_saved->saved_wq_enable_map = no_free_ptr(saved_wq_enable_map); + idxd_saved->saved_wqs = no_free_ptr(saved_wqs); + + return 0; +} + +/* + * Restore IDXD device configurations including engines, groups, wqs etc + * that were saved before. + */ +static void idxd_device_config_restore(struct idxd_device *idxd, + struct idxd_saved_states *idxd_saved) +{ + struct idxd_evl *saved_evl = &idxd_saved->saved_evl; + int i; + + idxd->rdbuf_limit = idxd_saved->saved_idxd.rdbuf_limit; + + if (saved_evl) + idxd->evl->size = saved_evl->size; + + for (i = 0; i < idxd->max_groups; i++) { + struct idxd_group *saved_group, *group; + + saved_group = idxd_saved->saved_groups[i]; + group = idxd->groups[i]; + + group->rdbufs_allowed = saved_group->rdbufs_allowed; + group->rdbufs_reserved = saved_group->rdbufs_reserved; + group->tc_a = saved_group->tc_a; + group->tc_b = saved_group->tc_b; + group->use_rdbuf_limit = saved_group->use_rdbuf_limit; + + kfree(saved_group); + } + kfree(idxd_saved->saved_groups); + + for (i = 0; i < idxd->max_engines; i++) { + struct idxd_engine *saved_engine, *engine; + + saved_engine = idxd_saved->saved_engines[i]; + engine = idxd->engines[i]; + + engine->group = saved_engine->group; + + kfree(saved_engine); + } + kfree(idxd_saved->saved_engines); + + bitmap_copy(idxd->wq_enable_map, idxd_saved->saved_wq_enable_map, + idxd->max_wqs); + bitmap_free(idxd_saved->saved_wq_enable_map); + + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *saved_wq, *wq; + size_t len; + + if (!test_bit(i, idxd->wq_enable_map)) + continue; + + saved_wq = idxd_saved->saved_wqs[i]; + wq = idxd->wqs[i]; + + mutex_lock(&wq->wq_lock); + + wq->group = saved_wq->group; + wq->flags = saved_wq->flags; + wq->threshold = saved_wq->threshold; + wq->size = saved_wq->size; + wq->priority = saved_wq->priority; + wq->type = saved_wq->type; + len = strlen(saved_wq->name) + 1; + strscpy(wq->name, saved_wq->name, len); + wq->max_xfer_bytes = saved_wq->max_xfer_bytes; + wq->max_batch_size = saved_wq->max_batch_size; + wq->enqcmds_retries = saved_wq->enqcmds_retries; + wq->descs = saved_wq->descs; + wq->idxd_chan = saved_wq->idxd_chan; + len = strlen(saved_wq->driver_name) + 1; + strscpy(wq->driver_name, saved_wq->driver_name, len); + + mutex_unlock(&wq->wq_lock); + + kfree(saved_wq); + } + + kfree(idxd_saved->saved_wqs); +} + +static void idxd_reset_prepare(struct pci_dev *pdev) +{ + struct idxd_device *idxd = pci_get_drvdata(pdev); + struct device *dev = &idxd->pdev->dev; + const char *idxd_name; int rc; - rc = pci_enable_device(pdev); - if (rc) - return rc; + dev = &idxd->pdev->dev; + idxd_name = dev_name(idxd_confdev(idxd)); - dev_dbg(dev, "Alloc IDXD context\n"); - idxd = idxd_alloc(pdev, data); - if (!idxd) { - rc = -ENOMEM; - goto err_idxd_alloc; + struct idxd_saved_states *idxd_saved __free(kfree) = + kzalloc_node(sizeof(*idxd_saved), GFP_KERNEL, + dev_to_node(&pdev->dev)); + if (!idxd_saved) { + dev_err(dev, "HALT: no memory\n"); + + return; } - dev_dbg(dev, "Mapping BARs\n"); - idxd->reg_base = pci_iomap(pdev, IDXD_MMIO_BAR, 0); - if (!idxd->reg_base) { - rc = -ENOMEM; - goto err_iomap; + /* Save IDXD configurations. */ + rc = idxd_device_config_save(idxd, idxd_saved); + if (rc < 0) { + dev_err(dev, "HALT: cannot save %s configs\n", idxd_name); + + return; + } + + idxd->idxd_saved = no_free_ptr(idxd_saved); + + /* Save PCI device state. */ + pci_save_state(idxd->pdev); +} + +static void idxd_reset_done(struct pci_dev *pdev) +{ + struct idxd_device *idxd = pci_get_drvdata(pdev); + const char *idxd_name; + struct device *dev; + int rc, i; + + if (!idxd->idxd_saved) + return; + + dev = &idxd->pdev->dev; + idxd_name = dev_name(idxd_confdev(idxd)); + + /* Restore PCI device state. */ + pci_restore_state(idxd->pdev); + + /* Unbind idxd device from driver. */ + idxd_unbind(&idxd_drv.drv, idxd_name); + + /* + * Probe PCI device without allocating or changing + * idxd software data which keeps the same as before FLR. + */ + idxd_pci_probe_alloc(idxd, NULL, NULL); + + /* Restore IDXD configurations. */ + idxd_device_config_restore(idxd, idxd->idxd_saved); + + /* Re-configure IDXD device if allowed. */ + if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) { + rc = idxd_device_config(idxd); + if (rc < 0) { + dev_err(dev, "HALT: %s config fails\n", idxd_name); + goto out; + } + } + + /* Bind IDXD device to driver. */ + rc = idxd_bind(&idxd_drv.drv, idxd_name); + if (rc < 0) { + dev_err(dev, "HALT: binding %s to driver fails\n", idxd_name); + goto out; } - dev_dbg(dev, "Set DMA masks\n"); - rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + /* Bind enabled wq in the IDXD device to driver. */ + for (i = 0; i < idxd->max_wqs; i++) { + if (test_bit(i, idxd->wq_enable_map)) { + struct idxd_wq *wq = idxd->wqs[i]; + char wq_name[32]; + + wq->state = IDXD_WQ_DISABLED; + sprintf(wq_name, "wq%d.%d", idxd->id, wq->id); + /* + * Bind to user driver depending on wq type. + * + * Currently only support user type WQ. Will support + * kernel type WQ in the future. + */ + if (wq->type == IDXD_WQT_USER) + rc = idxd_bind(&idxd_user_drv.drv, wq_name); + else + rc = -EINVAL; + if (rc < 0) { + clear_bit(i, idxd->wq_enable_map); + dev_err(dev, + "HALT: unable to re-enable wq %s\n", + dev_name(wq_confdev(wq))); + } + } + } +out: + kfree(idxd->idxd_saved); +} + +static const struct pci_error_handlers idxd_error_handler = { + .reset_prepare = idxd_reset_prepare, + .reset_done = idxd_reset_done, +}; + +/* + * Probe idxd PCI device. + * If idxd is not given, need to allocate idxd and set up its data. + * + * If idxd is given, idxd was allocated and setup already. Just need to + * configure device without re-allocating and re-configuring idxd data. + * This is useful for recovering from FLR. + */ +int idxd_pci_probe_alloc(struct idxd_device *idxd, struct pci_dev *pdev, + const struct pci_device_id *id) +{ + bool alloc_idxd = idxd ? false : true; + struct idxd_driver_data *data; + struct device *dev; + int rc; + + pdev = idxd ? idxd->pdev : pdev; + dev = &pdev->dev; + data = id ? (struct idxd_driver_data *)id->driver_data : NULL; + rc = pci_enable_device(pdev); if (rc) - goto err; + return rc; + + if (alloc_idxd) { + dev_dbg(dev, "Alloc IDXD context\n"); + idxd = idxd_alloc(pdev, data); + if (!idxd) { + rc = -ENOMEM; + goto err_idxd_alloc; + } + + dev_dbg(dev, "Mapping BARs\n"); + idxd->reg_base = pci_iomap(pdev, IDXD_MMIO_BAR, 0); + if (!idxd->reg_base) { + rc = -ENOMEM; + goto err_iomap; + } + + dev_dbg(dev, "Set DMA masks\n"); + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (rc) + goto err; + } dev_dbg(dev, "Set PCI master\n"); pci_set_master(pdev); pci_set_drvdata(pdev, idxd); - idxd->hw.version = ioread32(idxd->reg_base + IDXD_VER_OFFSET); - rc = idxd_probe(idxd); - if (rc) { - dev_err(dev, "Intel(R) IDXD DMA Engine init failed\n"); - goto err; - } + if (alloc_idxd) { + idxd->hw.version = ioread32(idxd->reg_base + IDXD_VER_OFFSET); + rc = idxd_probe(idxd); + if (rc) { + dev_err(dev, "Intel(R) IDXD DMA Engine init failed\n"); + goto err; + } + + if (data->load_device_defaults) { + rc = data->load_device_defaults(idxd); + if (rc) + dev_warn(dev, "IDXD loading device defaults failed\n"); + } + + rc = idxd_register_devices(idxd); + if (rc) { + dev_err(dev, "IDXD sysfs setup failed\n"); + goto err_dev_register; + } - if (data->load_device_defaults) { - rc = data->load_device_defaults(idxd); + rc = idxd_device_init_debugfs(idxd); if (rc) - dev_warn(dev, "IDXD loading device defaults failed\n"); + dev_warn(dev, "IDXD debugfs failed to setup\n"); } - rc = idxd_register_devices(idxd); - if (rc) { - dev_err(dev, "IDXD sysfs setup failed\n"); - goto err_dev_register; - } + if (!alloc_idxd) { + /* Release interrupts in the IDXD device. */ + idxd_cleanup_interrupts(idxd); - rc = idxd_device_init_debugfs(idxd); - if (rc) - dev_warn(dev, "IDXD debugfs failed to setup\n"); + /* Re-enable interrupts in the IDXD device. */ + rc = idxd_setup_interrupts(idxd); + if (rc) + dev_warn(dev, "IDXD interrupts failed to setup\n"); + } dev_info(&pdev->dev, "Intel(R) Accelerator Device (v%x)\n", idxd->hw.version); - idxd->user_submission_safe = data->user_submission_safe; + if (data) + idxd->user_submission_safe = data->user_submission_safe; return 0; @@ -798,6 +1197,11 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return rc; } +static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + return idxd_pci_probe_alloc(NULL, pdev, id); +} + void idxd_wqs_quiesce(struct idxd_device *idxd) { struct idxd_wq *wq; @@ -864,6 +1268,7 @@ static struct pci_driver idxd_pci_driver = { .probe = idxd_pci_probe, .remove = idxd_remove, .shutdown = idxd_shutdown, + .err_handler = &idxd_error_handler, }; static int __init idxd_init_module(void) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index fc049c9c9892..1107db3ce0a3 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -383,15 +383,65 @@ static void process_evl_entries(struct idxd_device *idxd) mutex_unlock(&evl->lock); } +static void idxd_device_flr(struct work_struct *work) +{ + struct idxd_device *idxd = container_of(work, struct idxd_device, work); + int rc; + + /* + * IDXD device requires a Function Level Reset (FLR). + * pci_reset_function() will reset the device with FLR. + */ + rc = pci_reset_function(idxd->pdev); + if (rc) + dev_err(&idxd->pdev->dev, "FLR failed\n"); +} + +static irqreturn_t idxd_halt(struct idxd_device *idxd) +{ + union gensts_reg gensts; + + gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET); + if (gensts.state == IDXD_DEVICE_STATE_HALT) { + idxd->state = IDXD_DEV_HALTED; + if (gensts.reset_type == IDXD_DEVICE_RESET_SOFTWARE) { + /* + * If we need a software reset, we will throw the work + * on a system workqueue in order to allow interrupts + * for the device command completions. + */ + INIT_WORK(&idxd->work, idxd_device_reinit); + queue_work(idxd->wq, &idxd->work); + } else if (gensts.reset_type == IDXD_DEVICE_RESET_FLR) { + idxd->state = IDXD_DEV_HALTED; + idxd_mask_error_interrupts(idxd); + dev_dbg(&idxd->pdev->dev, + "idxd halted, doing FLR. After FLR, configs are restored\n"); + INIT_WORK(&idxd->work, idxd_device_flr); + queue_work(idxd->wq, &idxd->work); + + } else { + idxd->state = IDXD_DEV_HALTED; + idxd_wqs_quiesce(idxd); + idxd_wqs_unmap_portal(idxd); + idxd_device_clear_state(idxd); + dev_err(&idxd->pdev->dev, + "idxd halted, need system reset"); + + return -ENXIO; + } + } + + return IRQ_HANDLED; +} + irqreturn_t idxd_misc_thread(int vec, void *data) { struct idxd_irq_entry *irq_entry = data; struct idxd_device *idxd = ie_to_idxd(irq_entry); struct device *dev = &idxd->pdev->dev; - union gensts_reg gensts; u32 val = 0; int i; - bool err = false; u32 cause; cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET); @@ -401,7 +451,7 @@ irqreturn_t idxd_misc_thread(int vec, void *data) iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET); if (cause & IDXD_INTC_HALT_STATE) - goto halt; + return idxd_halt(idxd); if (cause & IDXD_INTC_ERR) { spin_lock(&idxd->dev_lock); @@ -435,7 +485,6 @@ irqreturn_t idxd_misc_thread(int vec, void *data) for (i = 0; i < 4; i++) dev_warn_ratelimited(dev, "err[%d]: %#16.16llx\n", i, idxd->sw_err.bits[i]); - err = true; } if (cause & IDXD_INTC_INT_HANDLE_REVOKED) { @@ -480,34 +529,6 @@ irqreturn_t idxd_misc_thread(int vec, void *data) dev_warn_once(dev, "Unexpected interrupt cause bits set: %#x\n", val); - if (!err) - goto out; - -halt: - gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET); - if (gensts.state == IDXD_DEVICE_STATE_HALT) { - idxd->state = IDXD_DEV_HALTED; - if (gensts.reset_type == IDXD_DEVICE_RESET_SOFTWARE) { - /* - * If we need a software reset, we will throw the work - * on a system workqueue in order to allow interrupts - * for the device command completions. - */ - INIT_WORK(&idxd->work, idxd_device_reinit); - queue_work(idxd->wq, &idxd->work); - } else { - idxd->state = IDXD_DEV_HALTED; - idxd_wqs_quiesce(idxd); - idxd_wqs_unmap_portal(idxd); - idxd_device_clear_state(idxd); - dev_err(&idxd->pdev->dev, - "idxd halted, need %s.\n", - gensts.reset_type == IDXD_DEVICE_RESET_FLR ? - "FLR" : "system reset"); - } - } - -out: return IRQ_HANDLED; } diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index c426511f2104..006ba206ab1b 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -9,6 +9,7 @@ #define PCI_DEVICE_ID_INTEL_DSA_GNRD 0x11fb #define PCI_DEVICE_ID_INTEL_DSA_DMR 0x1212 #define PCI_DEVICE_ID_INTEL_IAA_DMR 0x1216 +#define PCI_DEVICE_ID_INTEL_IAA_PTL 0xb02d #define DEVICE_VERSION_1 0x100 #define DEVICE_VERSION_2 0x200 diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index f706eae0e76b..6af493f6ba77 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1979,13 +1979,3 @@ void idxd_unregister_devices(struct idxd_device *idxd) device_unregister(group_confdev(group)); } } - -int idxd_register_bus_type(void) -{ - return bus_register(&dsa_bus_type); -} - -void idxd_unregister_bus_type(void) -{ - bus_unregister(&dsa_bus_type); -} diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 40b76b40bc30..fa6e4646fdc2 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -1369,10 +1369,9 @@ static int mv_xor_probe(struct platform_device *pdev) return 0; if (pdev->dev.of_node) { - struct device_node *np; int i = 0; - for_each_child_of_node(pdev->dev.of_node, np) { + for_each_child_of_node_scoped(pdev->dev.of_node, np) { struct mv_xor_chan *chan; dma_cap_mask_t cap_mask; int irq; @@ -1388,7 +1387,6 @@ static int mv_xor_probe(struct platform_device *pdev) irq = irq_of_parse_and_map(np, 0); if (!irq) { ret = -ENODEV; - of_node_put(np); goto err_channel_add; } @@ -1397,7 +1395,6 @@ static int mv_xor_probe(struct platform_device *pdev) if (IS_ERR(chan)) { ret = PTR_ERR(chan); irq_dispose_mapping(irq); - of_node_put(np); goto err_channel_add; } diff --git a/drivers/dma/ptdma/Kconfig b/drivers/dma/ptdma/Kconfig deleted file mode 100644 index b430edd709f9..000000000000 --- a/drivers/dma/ptdma/Kconfig +++ /dev/null @@ -1,13 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -config AMD_PTDMA - tristate "AMD PassThru DMA Engine" - depends on X86_64 && PCI - select DMA_ENGINE - select DMA_VIRTUAL_CHANNELS - help - Enable support for the AMD PTDMA controller. This controller - provides DMA capabilities to perform high bandwidth memory to - memory and IO copy operations. It performs DMA transfer through - queue-based descriptor management. This DMA controller is intended - to be used with AMD Non-Transparent Bridge devices and not for - general purpose peripheral DMA. diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c index bbc3276992bb..c14557efd577 100644 --- a/drivers/dma/qcom/bam_dma.c +++ b/drivers/dma/qcom/bam_dma.c @@ -59,6 +59,9 @@ struct bam_desc_hw { #define DESC_FLAG_NWD BIT(12) #define DESC_FLAG_CMD BIT(11) +#define BAM_NDP_REVISION_START 0x20 +#define BAM_NDP_REVISION_END 0x27 + struct bam_async_desc { struct virt_dma_desc vd; @@ -398,6 +401,7 @@ struct bam_device { /* dma start transaction tasklet */ struct tasklet_struct task; + u32 bam_revision; }; /** @@ -441,8 +445,10 @@ static void bam_reset(struct bam_device *bdev) writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL)); /* set descriptor threshold, start with 4 bytes */ - writel_relaxed(DEFAULT_CNT_THRSHLD, - bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD)); + if (in_range(bdev->bam_revision, BAM_NDP_REVISION_START, + BAM_NDP_REVISION_END)) + writel_relaxed(DEFAULT_CNT_THRSHLD, + bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD)); /* Enable default set of h/w workarounds, ie all except BAM_FULL_PIPE */ writel_relaxed(BAM_CNFG_BITS_DEFAULT, bam_addr(bdev, 0, BAM_CNFG_BITS)); @@ -1000,9 +1006,10 @@ static void bam_apply_new_config(struct bam_chan *bchan, maxburst = bchan->slave.src_maxburst; else maxburst = bchan->slave.dst_maxburst; - - writel_relaxed(maxburst, - bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD)); + if (in_range(bdev->bam_revision, BAM_NDP_REVISION_START, + BAM_NDP_REVISION_END)) + writel_relaxed(maxburst, + bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD)); } bchan->reconfigure = 0; @@ -1192,10 +1199,11 @@ static int bam_init(struct bam_device *bdev) u32 val; /* read revision and configuration information */ - if (!bdev->num_ees) { - val = readl_relaxed(bam_addr(bdev, 0, BAM_REVISION)); + val = readl_relaxed(bam_addr(bdev, 0, BAM_REVISION)); + if (!bdev->num_ees) bdev->num_ees = (val >> NUM_EES_SHIFT) & NUM_EES_MASK; - } + + bdev->bam_revision = val & REVISION_MASK; /* check that configured EE is within range */ if (bdev->ee >= bdev->num_ees) diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c index 52a7c8f2498f..b1f0001cc99c 100644 --- a/drivers/dma/qcom/gpi.c +++ b/drivers/dma/qcom/gpi.c @@ -18,6 +18,7 @@ #include "../virt-dma.h" #define TRE_TYPE_DMA 0x10 +#define TRE_TYPE_IMMEDIATE_DMA 0x11 #define TRE_TYPE_GO 0x20 #define TRE_TYPE_CONFIG0 0x22 @@ -64,6 +65,7 @@ /* DMA TRE */ #define TRE_DMA_LEN GENMASK(23, 0) +#define TRE_DMA_IMMEDIATE_LEN GENMASK(3, 0) /* Register offsets from gpi-top */ #define GPII_n_CH_k_CNTXT_0_OFFS(n, k) (0x20000 + (0x4000 * (n)) + (0x80 * (k))) @@ -1711,6 +1713,7 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc, dma_addr_t address; struct gpi_tre *tre; unsigned int i; + int len; /* first create config tre if applicable */ if (direction == DMA_MEM_TO_DEV && spi->set_config) { @@ -1763,14 +1766,30 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc, tre_idx++; address = sg_dma_address(sgl); - tre->dword[0] = lower_32_bits(address); - tre->dword[1] = upper_32_bits(address); + len = sg_dma_len(sgl); - tre->dword[2] = u32_encode_bits(sg_dma_len(sgl), TRE_DMA_LEN); + /* Support Immediate dma for write transfers for data length up to 8 bytes */ + if (direction == DMA_MEM_TO_DEV && len <= 2 * sizeof(tre->dword[0])) { + /* + * For Immediate dma, data length may not always be length of 8 bytes, + * it can be length less than 8, hence initialize both dword's with 0 + */ + tre->dword[0] = 0; + tre->dword[1] = 0; + memcpy(&tre->dword[0], sg_virt(sgl), len); - tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); - if (direction == DMA_MEM_TO_DEV) - tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT); + tre->dword[2] = u32_encode_bits(len, TRE_DMA_IMMEDIATE_LEN); + tre->dword[3] = u32_encode_bits(TRE_TYPE_IMMEDIATE_DMA, TRE_FLAGS_TYPE); + } else { + tre->dword[0] = lower_32_bits(address); + tre->dword[1] = upper_32_bits(address); + + tre->dword[2] = u32_encode_bits(len, TRE_DMA_LEN); + tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); + } + + tre->dword[3] |= u32_encode_bits(direction == DMA_MEM_TO_DEV, + TRE_FLAGS_IEOT); for (i = 0; i < tre_idx; i++) dev_dbg(dev, "TRE:%d %x:%x:%x:%x\n", i, desc->tre[i].dword[0], diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index 2679c1f09faf..0c45ce8c74aa 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -2023,6 +2023,10 @@ static const struct of_device_id rcar_dmac_of_ids[] = { .compatible = "renesas,rcar-gen4-dmac", .data = &rcar_gen4_dmac_data, }, { + /* + * Backward compatibility for between v5.12 - v5.19 + * which didn't combined with "renesas,rcar-gen4-dmac" + */ .compatible = "renesas,dmac-r8a779a0", .data = &rcar_gen4_dmac_data, }, diff --git a/drivers/dma/sun4i-dma.c b/drivers/dma/sun4i-dma.c index f37cdf6f2179..24796aaaddfa 100644 --- a/drivers/dma/sun4i-dma.c +++ b/drivers/dma/sun4i-dma.c @@ -13,7 +13,9 @@ #include <linux/interrupt.h> #include <linux/module.h> #include <linux/of_dma.h> +#include <linux/of_device.h> #include <linux/platform_device.h> +#include <linux/reset.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -31,12 +33,21 @@ #define SUN4I_DMA_CFG_SRC_ADDR_MODE(mode) ((mode) << 5) #define SUN4I_DMA_CFG_SRC_DRQ_TYPE(type) (type) +#define SUNIV_DMA_CFG_DST_DATA_WIDTH(width) ((width) << 24) +#define SUNIV_DMA_CFG_SRC_DATA_WIDTH(width) ((width) << 8) + +#define SUN4I_MAX_BURST 8 +#define SUNIV_MAX_BURST 4 + /** Normal DMA register values **/ /* Normal DMA source/destination data request type values */ #define SUN4I_NDMA_DRQ_TYPE_SDRAM 0x16 #define SUN4I_NDMA_DRQ_TYPE_LIMIT (0x1F + 1) +#define SUNIV_NDMA_DRQ_TYPE_SDRAM 0x11 +#define SUNIV_NDMA_DRQ_TYPE_LIMIT (0x17 + 1) + /** Normal DMA register layout **/ /* Dedicated DMA source/destination address mode values */ @@ -50,6 +61,9 @@ #define SUN4I_NDMA_CFG_BYTE_COUNT_MODE_REMAIN BIT(15) #define SUN4I_NDMA_CFG_SRC_NON_SECURE BIT(6) +#define SUNIV_NDMA_CFG_CONT_MODE BIT(29) +#define SUNIV_NDMA_CFG_WAIT_STATE(n) ((n) << 26) + /** Dedicated DMA register values **/ /* Dedicated DMA source/destination address mode values */ @@ -62,6 +76,9 @@ #define SUN4I_DDMA_DRQ_TYPE_SDRAM 0x1 #define SUN4I_DDMA_DRQ_TYPE_LIMIT (0x1F + 1) +#define SUNIV_DDMA_DRQ_TYPE_SDRAM 0x1 +#define SUNIV_DDMA_DRQ_TYPE_LIMIT (0x9 + 1) + /** Dedicated DMA register layout **/ /* Dedicated DMA configuration register layout */ @@ -115,6 +132,11 @@ #define SUN4I_DMA_NR_MAX_VCHANS \ (SUN4I_NDMA_NR_MAX_VCHANS + SUN4I_DDMA_NR_MAX_VCHANS) +#define SUNIV_NDMA_NR_MAX_CHANNELS 4 +#define SUNIV_DDMA_NR_MAX_CHANNELS 4 +#define SUNIV_NDMA_NR_MAX_VCHANS (24 * 2 - 1) +#define SUNIV_DDMA_NR_MAX_VCHANS 10 + /* This set of SUN4I_DDMA timing parameters were found experimentally while * working with the SPI driver and seem to make it behave correctly */ #define SUN4I_DDMA_MAGIC_SPI_PARAMETERS \ @@ -132,6 +154,33 @@ #define SUN4I_DDMA_MAX_SEG_SIZE SZ_16M #define SUN4I_DMA_MAX_SEG_SIZE SUN4I_NDMA_MAX_SEG_SIZE +/* + * Hardware channels / ports representation + * + * The hardware is used in several SoCs, with differing numbers + * of channels and endpoints. This structure ties those numbers + * to a certain compatible string. + */ +struct sun4i_dma_config { + u32 ndma_nr_max_channels; + u32 ndma_nr_max_vchans; + + u32 ddma_nr_max_channels; + u32 ddma_nr_max_vchans; + + u32 dma_nr_max_channels; + + void (*set_dst_data_width)(u32 *p_cfg, s8 data_width); + void (*set_src_data_width)(u32 *p_cfg, s8 data_width); + int (*convert_burst)(u32 maxburst); + + u8 ndma_drq_sdram; + u8 ddma_drq_sdram; + + u8 max_burst; + bool has_reset; +}; + struct sun4i_dma_pchan { /* Register base of channel */ void __iomem *base; @@ -170,7 +219,7 @@ struct sun4i_dma_contract { }; struct sun4i_dma_dev { - DECLARE_BITMAP(pchans_used, SUN4I_DMA_NR_MAX_CHANNELS); + unsigned long *pchans_used; struct dma_device slave; struct sun4i_dma_pchan *pchans; struct sun4i_dma_vchan *vchans; @@ -178,6 +227,8 @@ struct sun4i_dma_dev { struct clk *clk; int irq; spinlock_t lock; + const struct sun4i_dma_config *cfg; + struct reset_control *rst; }; static struct sun4i_dma_dev *to_sun4i_dma_dev(struct dma_device *dev) @@ -200,7 +251,27 @@ static struct device *chan2dev(struct dma_chan *chan) return &chan->dev->device; } -static int convert_burst(u32 maxburst) +static void set_dst_data_width_a10(u32 *p_cfg, s8 data_width) +{ + *p_cfg |= SUN4I_DMA_CFG_DST_DATA_WIDTH(data_width); +} + +static void set_src_data_width_a10(u32 *p_cfg, s8 data_width) +{ + *p_cfg |= SUN4I_DMA_CFG_SRC_DATA_WIDTH(data_width); +} + +static void set_dst_data_width_f1c100s(u32 *p_cfg, s8 data_width) +{ + *p_cfg |= SUNIV_DMA_CFG_DST_DATA_WIDTH(data_width); +} + +static void set_src_data_width_f1c100s(u32 *p_cfg, s8 data_width) +{ + *p_cfg |= SUNIV_DMA_CFG_SRC_DATA_WIDTH(data_width); +} + +static int convert_burst_a10(u32 maxburst) { if (maxburst > 8) return -EINVAL; @@ -209,6 +280,15 @@ static int convert_burst(u32 maxburst) return (maxburst >> 2); } +static int convert_burst_f1c100s(u32 maxburst) +{ + if (maxburst > 4) + return -EINVAL; + + /* 1 -> 0, 4 -> 1 */ + return (maxburst >> 2); +} + static int convert_buswidth(enum dma_slave_buswidth addr_width) { if (addr_width > DMA_SLAVE_BUSWIDTH_4_BYTES) @@ -233,15 +313,15 @@ static struct sun4i_dma_pchan *find_and_use_pchan(struct sun4i_dma_dev *priv, int i, max; /* - * pchans 0-SUN4I_NDMA_NR_MAX_CHANNELS are normal, and - * SUN4I_NDMA_NR_MAX_CHANNELS+ are dedicated ones + * pchans 0-priv->cfg->ndma_nr_max_channels are normal, and + * priv->cfg->ndma_nr_max_channels+ are dedicated ones */ if (vchan->is_dedicated) { - i = SUN4I_NDMA_NR_MAX_CHANNELS; - max = SUN4I_DMA_NR_MAX_CHANNELS; + i = priv->cfg->ndma_nr_max_channels; + max = priv->cfg->dma_nr_max_channels; } else { i = 0; - max = SUN4I_NDMA_NR_MAX_CHANNELS; + max = priv->cfg->ndma_nr_max_channels; } spin_lock_irqsave(&priv->lock, flags); @@ -444,6 +524,7 @@ generate_ndma_promise(struct dma_chan *chan, dma_addr_t src, dma_addr_t dest, size_t len, struct dma_slave_config *sconfig, enum dma_transfer_direction direction) { + struct sun4i_dma_dev *priv = to_sun4i_dma_dev(chan->device); struct sun4i_dma_promise *promise; int ret; @@ -467,13 +548,13 @@ generate_ndma_promise(struct dma_chan *chan, dma_addr_t src, dma_addr_t dest, sconfig->src_addr_width, sconfig->dst_addr_width); /* Source burst */ - ret = convert_burst(sconfig->src_maxburst); + ret = priv->cfg->convert_burst(sconfig->src_maxburst); if (ret < 0) goto fail; promise->cfg |= SUN4I_DMA_CFG_SRC_BURST_LENGTH(ret); /* Destination burst */ - ret = convert_burst(sconfig->dst_maxburst); + ret = priv->cfg->convert_burst(sconfig->dst_maxburst); if (ret < 0) goto fail; promise->cfg |= SUN4I_DMA_CFG_DST_BURST_LENGTH(ret); @@ -482,13 +563,13 @@ generate_ndma_promise(struct dma_chan *chan, dma_addr_t src, dma_addr_t dest, ret = convert_buswidth(sconfig->src_addr_width); if (ret < 0) goto fail; - promise->cfg |= SUN4I_DMA_CFG_SRC_DATA_WIDTH(ret); + priv->cfg->set_src_data_width(&promise->cfg, ret); /* Destination bus width */ ret = convert_buswidth(sconfig->dst_addr_width); if (ret < 0) goto fail; - promise->cfg |= SUN4I_DMA_CFG_DST_DATA_WIDTH(ret); + priv->cfg->set_dst_data_width(&promise->cfg, ret); return promise; @@ -510,6 +591,7 @@ static struct sun4i_dma_promise * generate_ddma_promise(struct dma_chan *chan, dma_addr_t src, dma_addr_t dest, size_t len, struct dma_slave_config *sconfig) { + struct sun4i_dma_dev *priv = to_sun4i_dma_dev(chan->device); struct sun4i_dma_promise *promise; int ret; @@ -524,13 +606,13 @@ generate_ddma_promise(struct dma_chan *chan, dma_addr_t src, dma_addr_t dest, SUN4I_DDMA_CFG_BYTE_COUNT_MODE_REMAIN; /* Source burst */ - ret = convert_burst(sconfig->src_maxburst); + ret = priv->cfg->convert_burst(sconfig->src_maxburst); if (ret < 0) goto fail; promise->cfg |= SUN4I_DMA_CFG_SRC_BURST_LENGTH(ret); /* Destination burst */ - ret = convert_burst(sconfig->dst_maxburst); + ret = priv->cfg->convert_burst(sconfig->dst_maxburst); if (ret < 0) goto fail; promise->cfg |= SUN4I_DMA_CFG_DST_BURST_LENGTH(ret); @@ -539,13 +621,13 @@ generate_ddma_promise(struct dma_chan *chan, dma_addr_t src, dma_addr_t dest, ret = convert_buswidth(sconfig->src_addr_width); if (ret < 0) goto fail; - promise->cfg |= SUN4I_DMA_CFG_SRC_DATA_WIDTH(ret); + priv->cfg->set_src_data_width(&promise->cfg, ret); /* Destination bus width */ ret = convert_buswidth(sconfig->dst_addr_width); if (ret < 0) goto fail; - promise->cfg |= SUN4I_DMA_CFG_DST_DATA_WIDTH(ret); + priv->cfg->set_dst_data_width(&promise->cfg, ret); return promise; @@ -622,6 +704,7 @@ static struct dma_async_tx_descriptor * sun4i_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, size_t len, unsigned long flags) { + struct sun4i_dma_dev *priv = to_sun4i_dma_dev(chan->device); struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); struct dma_slave_config *sconfig = &vchan->cfg; struct sun4i_dma_promise *promise; @@ -638,8 +721,8 @@ sun4i_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, */ sconfig->src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; sconfig->dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - sconfig->src_maxburst = 8; - sconfig->dst_maxburst = 8; + sconfig->src_maxburst = priv->cfg->max_burst; + sconfig->dst_maxburst = priv->cfg->max_burst; if (vchan->is_dedicated) promise = generate_ddma_promise(chan, src, dest, len, sconfig); @@ -654,11 +737,13 @@ sun4i_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, /* Configure memcpy mode */ if (vchan->is_dedicated) { - promise->cfg |= SUN4I_DMA_CFG_SRC_DRQ_TYPE(SUN4I_DDMA_DRQ_TYPE_SDRAM) | - SUN4I_DMA_CFG_DST_DRQ_TYPE(SUN4I_DDMA_DRQ_TYPE_SDRAM); + promise->cfg |= + SUN4I_DMA_CFG_SRC_DRQ_TYPE(priv->cfg->ddma_drq_sdram) | + SUN4I_DMA_CFG_DST_DRQ_TYPE(priv->cfg->ddma_drq_sdram); } else { - promise->cfg |= SUN4I_DMA_CFG_SRC_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM) | - SUN4I_DMA_CFG_DST_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM); + promise->cfg |= + SUN4I_DMA_CFG_SRC_DRQ_TYPE(priv->cfg->ndma_drq_sdram) | + SUN4I_DMA_CFG_DST_DRQ_TYPE(priv->cfg->ndma_drq_sdram); } /* Fill the contract with our only promise */ @@ -673,6 +758,7 @@ sun4i_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf, size_t len, size_t period_len, enum dma_transfer_direction dir, unsigned long flags) { + struct sun4i_dma_dev *priv = to_sun4i_dma_dev(chan->device); struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); struct dma_slave_config *sconfig = &vchan->cfg; struct sun4i_dma_promise *promise; @@ -696,11 +782,11 @@ sun4i_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf, size_t len, if (vchan->is_dedicated) { io_mode = SUN4I_DDMA_ADDR_MODE_IO; linear_mode = SUN4I_DDMA_ADDR_MODE_LINEAR; - ram_type = SUN4I_DDMA_DRQ_TYPE_SDRAM; + ram_type = priv->cfg->ddma_drq_sdram; } else { io_mode = SUN4I_NDMA_ADDR_MODE_IO; linear_mode = SUN4I_NDMA_ADDR_MODE_LINEAR; - ram_type = SUN4I_NDMA_DRQ_TYPE_SDRAM; + ram_type = priv->cfg->ndma_drq_sdram; } if (dir == DMA_MEM_TO_DEV) { @@ -793,6 +879,7 @@ sun4i_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, enum dma_transfer_direction dir, unsigned long flags, void *context) { + struct sun4i_dma_dev *priv = to_sun4i_dma_dev(chan->device); struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); struct dma_slave_config *sconfig = &vchan->cfg; struct sun4i_dma_promise *promise; @@ -818,11 +905,11 @@ sun4i_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, if (vchan->is_dedicated) { io_mode = SUN4I_DDMA_ADDR_MODE_IO; linear_mode = SUN4I_DDMA_ADDR_MODE_LINEAR; - ram_type = SUN4I_DDMA_DRQ_TYPE_SDRAM; + ram_type = priv->cfg->ddma_drq_sdram; } else { io_mode = SUN4I_NDMA_ADDR_MODE_IO; linear_mode = SUN4I_NDMA_ADDR_MODE_LINEAR; - ram_type = SUN4I_NDMA_DRQ_TYPE_SDRAM; + ram_type = priv->cfg->ndma_drq_sdram; } if (dir == DMA_MEM_TO_DEV) @@ -1150,6 +1237,10 @@ static int sun4i_dma_probe(struct platform_device *pdev) if (!priv) return -ENOMEM; + priv->cfg = of_device_get_match_data(&pdev->dev); + if (!priv->cfg) + return -ENODEV; + priv->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(priv->base)) return PTR_ERR(priv->base); @@ -1164,6 +1255,13 @@ static int sun4i_dma_probe(struct platform_device *pdev) return PTR_ERR(priv->clk); } + if (priv->cfg->has_reset) { + priv->rst = devm_reset_control_get_exclusive_deasserted(&pdev->dev, NULL); + if (IS_ERR(priv->rst)) + return dev_err_probe(&pdev->dev, PTR_ERR(priv->rst), + "Failed to get reset control\n"); + } + platform_set_drvdata(pdev, priv); spin_lock_init(&priv->lock); @@ -1197,23 +1295,26 @@ static int sun4i_dma_probe(struct platform_device *pdev) priv->slave.dev = &pdev->dev; - priv->pchans = devm_kcalloc(&pdev->dev, SUN4I_DMA_NR_MAX_CHANNELS, + priv->pchans = devm_kcalloc(&pdev->dev, priv->cfg->dma_nr_max_channels, sizeof(struct sun4i_dma_pchan), GFP_KERNEL); priv->vchans = devm_kcalloc(&pdev->dev, SUN4I_DMA_NR_MAX_VCHANS, sizeof(struct sun4i_dma_vchan), GFP_KERNEL); - if (!priv->vchans || !priv->pchans) + priv->pchans_used = devm_kcalloc(&pdev->dev, + BITS_TO_LONGS(priv->cfg->dma_nr_max_channels), + sizeof(unsigned long), GFP_KERNEL); + if (!priv->vchans || !priv->pchans || !priv->pchans_used) return -ENOMEM; /* - * [0..SUN4I_NDMA_NR_MAX_CHANNELS) are normal pchans, and - * [SUN4I_NDMA_NR_MAX_CHANNELS..SUN4I_DMA_NR_MAX_CHANNELS) are + * [0..priv->cfg->ndma_nr_max_channels) are normal pchans, and + * [priv->cfg->ndma_nr_max_channels..priv->cfg->dma_nr_max_channels) are * dedicated ones */ - for (i = 0; i < SUN4I_NDMA_NR_MAX_CHANNELS; i++) + for (i = 0; i < priv->cfg->ndma_nr_max_channels; i++) priv->pchans[i].base = priv->base + SUN4I_NDMA_CHANNEL_REG_BASE(i); - for (j = 0; i < SUN4I_DMA_NR_MAX_CHANNELS; i++, j++) { + for (j = 0; i < priv->cfg->dma_nr_max_channels; i++, j++) { priv->pchans[i].base = priv->base + SUN4I_DDMA_CHANNEL_REG_BASE(j); priv->pchans[i].is_dedicated = 1; @@ -1284,8 +1385,51 @@ static void sun4i_dma_remove(struct platform_device *pdev) clk_disable_unprepare(priv->clk); } +static struct sun4i_dma_config sun4i_a10_dma_cfg = { + .ndma_nr_max_channels = SUN4I_NDMA_NR_MAX_CHANNELS, + .ndma_nr_max_vchans = SUN4I_NDMA_NR_MAX_VCHANS, + + .ddma_nr_max_channels = SUN4I_DDMA_NR_MAX_CHANNELS, + .ddma_nr_max_vchans = SUN4I_DDMA_NR_MAX_VCHANS, + + .dma_nr_max_channels = SUN4I_DMA_NR_MAX_CHANNELS, + + .set_dst_data_width = set_dst_data_width_a10, + .set_src_data_width = set_src_data_width_a10, + .convert_burst = convert_burst_a10, + + .ndma_drq_sdram = SUN4I_NDMA_DRQ_TYPE_SDRAM, + .ddma_drq_sdram = SUN4I_DDMA_DRQ_TYPE_SDRAM, + + .max_burst = SUN4I_MAX_BURST, + .has_reset = false, +}; + +static struct sun4i_dma_config suniv_f1c100s_dma_cfg = { + .ndma_nr_max_channels = SUNIV_NDMA_NR_MAX_CHANNELS, + .ndma_nr_max_vchans = SUNIV_NDMA_NR_MAX_VCHANS, + + .ddma_nr_max_channels = SUNIV_DDMA_NR_MAX_CHANNELS, + .ddma_nr_max_vchans = SUNIV_DDMA_NR_MAX_VCHANS, + + .dma_nr_max_channels = SUNIV_NDMA_NR_MAX_CHANNELS + + SUNIV_DDMA_NR_MAX_CHANNELS, + + .set_dst_data_width = set_dst_data_width_f1c100s, + .set_src_data_width = set_src_data_width_f1c100s, + .convert_burst = convert_burst_f1c100s, + + .ndma_drq_sdram = SUNIV_NDMA_DRQ_TYPE_SDRAM, + .ddma_drq_sdram = SUNIV_DDMA_DRQ_TYPE_SDRAM, + + .max_burst = SUNIV_MAX_BURST, + .has_reset = true, +}; + static const struct of_device_id sun4i_dma_match[] = { - { .compatible = "allwinner,sun4i-a10-dma" }, + { .compatible = "allwinner,sun4i-a10-dma", .data = &sun4i_a10_dma_cfg }, + { .compatible = "allwinner,suniv-f1c100s-dma", + .data = &suniv_f1c100s_dma_cfg }, { /* sentinel */ }, }; MODULE_DEVICE_TABLE(of, sun4i_dma_match); diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c index 2953008d42ef..6896da8ac7ef 100644 --- a/drivers/dma/tegra210-adma.c +++ b/drivers/dma/tegra210-adma.c @@ -43,6 +43,10 @@ #define ADMA_CH_CONFIG_MAX_BUFS 8 #define TEGRA186_ADMA_CH_CONFIG_OUTSTANDING_REQS(reqs) (reqs << 4) +#define TEGRA186_ADMA_GLOBAL_PAGE_CHGRP 0x30 +#define TEGRA186_ADMA_GLOBAL_PAGE_RX_REQ 0x70 +#define TEGRA186_ADMA_GLOBAL_PAGE_TX_REQ 0x84 + #define ADMA_CH_FIFO_CTRL 0x2c #define ADMA_CH_TX_FIFO_SIZE_SHIFT 8 #define ADMA_CH_RX_FIFO_SIZE_SHIFT 0 @@ -96,6 +100,7 @@ struct tegra_adma_chip_data { unsigned int ch_fifo_size_mask; unsigned int sreq_index_offset; bool has_outstanding_reqs; + void (*set_global_pg_config)(struct tegra_adma *tdma); }; /* @@ -151,6 +156,7 @@ struct tegra_adma { struct dma_device dma_dev; struct device *dev; void __iomem *base_addr; + void __iomem *ch_base_addr; struct clk *ahub_clk; unsigned int nr_channels; unsigned long *dma_chan_mask; @@ -159,6 +165,7 @@ struct tegra_adma { /* Used to store global command register state when suspending */ unsigned int global_cmd; + unsigned int ch_page_no; const struct tegra_adma_chip_data *cdata; @@ -176,6 +183,11 @@ static inline u32 tdma_read(struct tegra_adma *tdma, u32 reg) return readl(tdma->base_addr + tdma->cdata->global_reg_offset + reg); } +static inline void tdma_ch_global_write(struct tegra_adma *tdma, u32 reg, u32 val) +{ + writel(val, tdma->ch_base_addr + tdma->cdata->global_reg_offset + reg); +} + static inline void tdma_ch_write(struct tegra_adma_chan *tdc, u32 reg, u32 val) { writel(val, tdc->chan_addr + reg); @@ -217,13 +229,30 @@ static int tegra_adma_slave_config(struct dma_chan *dc, return 0; } +static void tegra186_adma_global_page_config(struct tegra_adma *tdma) +{ + /* + * Clear the default page1 channel group configs and program + * the global registers based on the actual page usage + */ + tdma_write(tdma, TEGRA186_ADMA_GLOBAL_PAGE_CHGRP, 0); + tdma_write(tdma, TEGRA186_ADMA_GLOBAL_PAGE_RX_REQ, 0); + tdma_write(tdma, TEGRA186_ADMA_GLOBAL_PAGE_TX_REQ, 0); + tdma_write(tdma, TEGRA186_ADMA_GLOBAL_PAGE_CHGRP + (tdma->ch_page_no * 0x4), 0xff); + tdma_write(tdma, TEGRA186_ADMA_GLOBAL_PAGE_RX_REQ + (tdma->ch_page_no * 0x4), 0x1ffffff); + tdma_write(tdma, TEGRA186_ADMA_GLOBAL_PAGE_TX_REQ + (tdma->ch_page_no * 0x4), 0xffffff); +} + static int tegra_adma_init(struct tegra_adma *tdma) { u32 status; int ret; - /* Clear any interrupts */ - tdma_write(tdma, tdma->cdata->ch_base_offset + tdma->cdata->global_int_clear, 0x1); + /* Clear any channels group global interrupts */ + tdma_ch_global_write(tdma, tdma->cdata->global_int_clear, 0x1); + + if (!tdma->base_addr) + return 0; /* Assert soft reset */ tdma_write(tdma, ADMA_GLOBAL_SOFT_RESET, 0x1); @@ -237,6 +266,9 @@ static int tegra_adma_init(struct tegra_adma *tdma) if (ret) return ret; + if (tdma->cdata->set_global_pg_config) + tdma->cdata->set_global_pg_config(tdma); + /* Enable global ADMA registers */ tdma_write(tdma, ADMA_GLOBAL_CMD, 1); @@ -736,7 +768,9 @@ static int __maybe_unused tegra_adma_runtime_suspend(struct device *dev) struct tegra_adma_chan *tdc; int i; - tdma->global_cmd = tdma_read(tdma, ADMA_GLOBAL_CMD); + if (tdma->base_addr) + tdma->global_cmd = tdma_read(tdma, ADMA_GLOBAL_CMD); + if (!tdma->global_cmd) goto clk_disable; @@ -777,7 +811,11 @@ static int __maybe_unused tegra_adma_runtime_resume(struct device *dev) dev_err(dev, "ahub clk_enable failed: %d\n", ret); return ret; } - tdma_write(tdma, ADMA_GLOBAL_CMD, tdma->global_cmd); + if (tdma->base_addr) { + tdma_write(tdma, ADMA_GLOBAL_CMD, tdma->global_cmd); + if (tdma->cdata->set_global_pg_config) + tdma->cdata->set_global_pg_config(tdma); + } if (!tdma->global_cmd) return 0; @@ -817,6 +855,7 @@ static const struct tegra_adma_chip_data tegra210_chip_data = { .ch_fifo_size_mask = 0xf, .sreq_index_offset = 2, .has_outstanding_reqs = false, + .set_global_pg_config = NULL, }; static const struct tegra_adma_chip_data tegra186_chip_data = { @@ -833,6 +872,7 @@ static const struct tegra_adma_chip_data tegra186_chip_data = { .ch_fifo_size_mask = 0x1f, .sreq_index_offset = 4, .has_outstanding_reqs = true, + .set_global_pg_config = tegra186_adma_global_page_config, }; static const struct of_device_id tegra_adma_of_match[] = { @@ -846,7 +886,8 @@ static int tegra_adma_probe(struct platform_device *pdev) { const struct tegra_adma_chip_data *cdata; struct tegra_adma *tdma; - int ret, i; + struct resource *res_page, *res_base; + int ret, i, page_no; cdata = of_device_get_match_data(&pdev->dev); if (!cdata) { @@ -865,9 +906,35 @@ static int tegra_adma_probe(struct platform_device *pdev) tdma->nr_channels = cdata->nr_channels; platform_set_drvdata(pdev, tdma); - tdma->base_addr = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(tdma->base_addr)) - return PTR_ERR(tdma->base_addr); + res_page = platform_get_resource_byname(pdev, IORESOURCE_MEM, "page"); + if (res_page) { + tdma->ch_base_addr = devm_ioremap_resource(&pdev->dev, res_page); + if (IS_ERR(tdma->ch_base_addr)) + return PTR_ERR(tdma->ch_base_addr); + + res_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "global"); + if (res_base) { + page_no = (res_page->start - res_base->start) / cdata->ch_base_offset; + if (page_no <= 0) + return -EINVAL; + tdma->ch_page_no = page_no - 1; + tdma->base_addr = devm_ioremap_resource(&pdev->dev, res_base); + if (IS_ERR(tdma->base_addr)) + return PTR_ERR(tdma->base_addr); + } + } else { + /* If no 'page' property found, then reg DT binding would be legacy */ + res_base = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (res_base) { + tdma->base_addr = devm_ioremap_resource(&pdev->dev, res_base); + if (IS_ERR(tdma->base_addr)) + return PTR_ERR(tdma->base_addr); + } else { + return -ENODEV; + } + + tdma->ch_base_addr = tdma->base_addr + cdata->ch_base_offset; + } tdma->ahub_clk = devm_clk_get(&pdev->dev, "d_audio"); if (IS_ERR(tdma->ahub_clk)) { @@ -900,8 +967,7 @@ static int tegra_adma_probe(struct platform_device *pdev) if (!test_bit(i, tdma->dma_chan_mask)) continue; - tdc->chan_addr = tdma->base_addr + cdata->ch_base_offset - + (cdata->ch_reg_size * i); + tdc->chan_addr = tdma->ch_base_addr + (cdata->ch_reg_size * i); tdc->irq = of_irq_get(pdev->dev.of_node, i); if (tdc->irq <= 0) { diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c index 343e986e66e7..4ece125b2ae7 100644 --- a/drivers/dma/ti/edma.c +++ b/drivers/dma/ti/edma.c @@ -208,7 +208,6 @@ struct edma_desc { struct edma_cc; struct edma_tc { - struct device_node *node; u16 id; }; @@ -2460,19 +2459,19 @@ static int edma_probe(struct platform_device *pdev) goto err_reg1; } - for (i = 0;; i++) { + for (i = 0; i < ecc->num_tc; i++) { ret = of_parse_phandle_with_fixed_args(node, "ti,tptcs", 1, i, &tc_args); - if (ret || i == ecc->num_tc) + if (ret) break; - ecc->tc_list[i].node = tc_args.np; ecc->tc_list[i].id = i; queue_priority_mapping[i][1] = tc_args.args[0]; if (queue_priority_mapping[i][1] > lowest_priority) { lowest_priority = queue_priority_mapping[i][1]; info->default_queue = i; } + of_node_put(tc_args.np); } /* See if we have optional dma-channel-mask array */ diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index b3f27b3f9209..7ed1956b4642 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -4404,6 +4404,18 @@ static struct udma_match_data j721s2_bcdma_csi_data = { .soc_data = &j721s2_bcdma_csi_soc_data, }; +static struct udma_match_data j722s_bcdma_csi_data = { + .type = DMA_TYPE_BCDMA, + .psil_base = 0x3100, + .enable_memcpy_support = false, + .burst_size = { + TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* Normal Channels */ + 0, /* No H Channels */ + 0, /* No UH Channels */ + }, + .soc_data = &j721s2_bcdma_csi_soc_data, +}; + static const struct of_device_id udma_of_match[] = { { .compatible = "ti,am654-navss-main-udmap", @@ -4435,6 +4447,10 @@ static const struct of_device_id udma_of_match[] = { .compatible = "ti,j721s2-dmss-bcdma-csi", .data = &j721s2_bcdma_csi_data, }, + { + .compatible = "ti,j722s-dmss-bcdma-csi", + .data = &j722s_bcdma_csi_data, + }, { /* Sentinel */ }, }; MODULE_DEVICE_TABLE(of, udma_of_match); diff --git a/drivers/dma/xilinx/xdma.c b/drivers/dma/xilinx/xdma.c index 93772abc3b49..0d88b1a670e1 100644 --- a/drivers/dma/xilinx/xdma.c +++ b/drivers/dma/xilinx/xdma.c @@ -390,15 +390,11 @@ static int xdma_xfer_start(struct xdma_chan *xchan) */ static int xdma_xfer_stop(struct xdma_chan *xchan) { - int ret; struct xdma_device *xdev = xchan->xdev_hdl; /* clear run stop bit to prevent any further auto-triggering */ - ret = regmap_write(xdev->rmap, xchan->base + XDMA_CHAN_CONTROL_W1C, - CHAN_CTRL_RUN_STOP); - if (ret) - return ret; - return ret; + return regmap_write(xdev->rmap, xchan->base + XDMA_CHAN_CONTROL_W1C, + CHAN_CTRL_RUN_STOP); } /** diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 1bdd57de87a6..108a7287f4cd 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -1404,16 +1404,18 @@ static void xilinx_vdma_start_transfer(struct xilinx_dma_chan *chan) dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg); - j = chan->desc_submitcount; - reg = dma_read(chan, XILINX_DMA_REG_PARK_PTR); - if (chan->direction == DMA_MEM_TO_DEV) { - reg &= ~XILINX_DMA_PARK_PTR_RD_REF_MASK; - reg |= j << XILINX_DMA_PARK_PTR_RD_REF_SHIFT; - } else { - reg &= ~XILINX_DMA_PARK_PTR_WR_REF_MASK; - reg |= j << XILINX_DMA_PARK_PTR_WR_REF_SHIFT; + if (config->park) { + j = chan->desc_submitcount; + reg = dma_read(chan, XILINX_DMA_REG_PARK_PTR); + if (chan->direction == DMA_MEM_TO_DEV) { + reg &= ~XILINX_DMA_PARK_PTR_RD_REF_MASK; + reg |= j << XILINX_DMA_PARK_PTR_RD_REF_SHIFT; + } else { + reg &= ~XILINX_DMA_PARK_PTR_WR_REF_MASK; + reg |= j << XILINX_DMA_PARK_PTR_WR_REF_SHIFT; + } + dma_write(chan, XILINX_DMA_REG_PARK_PTR, reg); } - dma_write(chan, XILINX_DMA_REG_PARK_PTR, reg); /* Start the hardware */ xilinx_dma_start(chan); diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index a99fe35f1f0d..ec3e21ad2025 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -988,7 +988,7 @@ int fw_device_set_broadcast_channel(struct device *dev, void *gen) return 0; } -static int compare_configuration_rom(struct device *dev, void *data) +static int compare_configuration_rom(struct device *dev, const void *data) { const struct fw_device *old = fw_device(dev); const u32 *config_rom = data; @@ -1039,7 +1039,7 @@ static void fw_device_init(struct work_struct *work) // // serialize config_rom access. scoped_guard(rwsem_read, &fw_device_rwsem) { - found = device_find_child(card->device, (void *)device->config_rom, + found = device_find_child(card->device, device->config_rom, compare_configuration_rom); } if (found) { diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c index 157172a5f2b5..a3386bf36de5 100644 --- a/drivers/firmware/arm_scmi/bus.c +++ b/drivers/firmware/arm_scmi/bus.c @@ -238,10 +238,10 @@ static int scmi_dev_match(struct device *dev, const struct device_driver *drv) return 0; } -static int scmi_match_by_id_table(struct device *dev, void *data) +static int scmi_match_by_id_table(struct device *dev, const void *data) { struct scmi_device *sdev = to_scmi_dev(dev); - struct scmi_device_id *id_table = data; + const struct scmi_device_id *id_table = data; return sdev->protocol_id == id_table->protocol_id && (id_table->name && !strcmp(sdev->name, id_table->name)); diff --git a/drivers/firmware/arm_scmi/raw_mode.c b/drivers/firmware/arm_scmi/raw_mode.c index 9e89a6a763da..7cc0d616b8de 100644 --- a/drivers/firmware/arm_scmi/raw_mode.c +++ b/drivers/firmware/arm_scmi/raw_mode.c @@ -886,10 +886,8 @@ static __poll_t scmi_dbg_raw_mode_message_poll(struct file *filp, static int scmi_dbg_raw_mode_open(struct inode *inode, struct file *filp) { - u8 id; struct scmi_raw_mode_info *raw; struct scmi_dbg_raw_data *rd; - const char *id_str = filp->f_path.dentry->d_parent->d_name.name; if (!inode->i_private) return -ENODEV; @@ -915,8 +913,8 @@ static int scmi_dbg_raw_mode_open(struct inode *inode, struct file *filp) } /* Grab channel ID from debugfs entry naming if any */ - if (!kstrtou8(id_str, 16, &id)) - rd->chan_id = id; + /* not set - reassing 0 we already had after kzalloc() */ + rd->chan_id = debugfs_get_aux_num(filp); rd->raw = raw; filp->private_data = rd; @@ -1225,10 +1223,12 @@ void *scmi_raw_mode_init(const struct scmi_handle *handle, snprintf(cdir, 8, "0x%02X", channels[i]); chd = debugfs_create_dir(cdir, top_chans); - debugfs_create_file("message", 0600, chd, raw, + debugfs_create_file_aux_num("message", 0600, chd, + raw, channels[i], &scmi_dbg_raw_mode_message_fops); - debugfs_create_file("message_async", 0600, chd, raw, + debugfs_create_file_aux_num("message_async", 0600, chd, + raw, channels[i], &scmi_dbg_raw_mode_message_async_fops); } } diff --git a/drivers/firmware/cirrus/Kconfig b/drivers/firmware/cirrus/Kconfig index ee09269c63b5..0a883091259a 100644 --- a/drivers/firmware/cirrus/Kconfig +++ b/drivers/firmware/cirrus/Kconfig @@ -6,15 +6,13 @@ config FW_CS_DSP config FW_CS_DSP_KUNIT_TEST_UTILS tristate - depends on KUNIT - select REGMAP + depends on KUNIT && REGMAP select FW_CS_DSP config FW_CS_DSP_KUNIT_TEST tristate "KUnit tests for Cirrus Logic cs_dsp" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && REGMAP default KUNIT_ALL_TESTS - select REGMAP select FW_CS_DSP select FW_CS_DSP_KUNIT_TEST_UTILS help diff --git a/drivers/firmware/efi/dev-path-parser.c b/drivers/firmware/efi/dev-path-parser.c index 937be269fee8..13ea141c0def 100644 --- a/drivers/firmware/efi/dev-path-parser.c +++ b/drivers/firmware/efi/dev-path-parser.c @@ -47,9 +47,9 @@ static long __init parse_acpi_path(const struct efi_dev_path *node, return 0; } -static int __init match_pci_dev(struct device *dev, void *data) +static int __init match_pci_dev(struct device *dev, const void *data) { - unsigned int devfn = *(unsigned int *)data; + unsigned int devfn = *(const unsigned int *)data; return dev_is_pci(dev) && to_pci_dev(dev)->devfn == devfn; } diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c index 4cb455b2bdee..619b6fb9d833 100644 --- a/drivers/gpio/gpio-mxc.c +++ b/drivers/gpio/gpio-mxc.c @@ -490,8 +490,7 @@ static int mxc_gpio_probe(struct platform_device *pdev) port->gc.request = mxc_gpio_request; port->gc.free = mxc_gpio_free; port->gc.to_irq = mxc_gpio_to_irq; - port->gc.base = (pdev->id < 0) ? of_alias_get_id(np, "gpio") * 32 : - pdev->id * 32; + port->gc.base = of_alias_get_id(np, "gpio") * 32; err = devm_gpiochip_add_data(&pdev->dev, &port->gc, port); if (err) diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c index 686ae3d11ba3..a086087ada17 100644 --- a/drivers/gpio/gpio-sim.c +++ b/drivers/gpio/gpio-sim.c @@ -413,11 +413,6 @@ static int gpio_sim_setup_sysfs(struct gpio_sim_chip *chip) return devm_add_action_or_reset(dev, gpio_sim_sysfs_remove, chip); } -static int gpio_sim_dev_match_fwnode(struct device *dev, void *data) -{ - return device_match_fwnode(dev, data); -} - static int gpio_sim_add_bank(struct fwnode_handle *swnode, struct device *dev) { struct gpio_sim_chip *chip; @@ -503,7 +498,7 @@ static int gpio_sim_add_bank(struct fwnode_handle *swnode, struct device *dev) if (ret) return ret; - chip->dev = device_find_child(dev, swnode, gpio_sim_dev_match_fwnode); + chip->dev = device_find_child(dev, swnode, device_match_fwnode); if (!chip->dev) return -ENODEV; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index c1772f44b1d7..2523221a2519 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -4021,17 +4021,6 @@ static void gfx_v12_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade if (def != data) WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); - - data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); - data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; - WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); - - /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ - if (adev->sdma.num_instances > 1) { - data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); - data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; - WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); - } } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 1405e8affd48..d4593374e7a1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -2325,9 +2325,9 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, */ mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd)) { + while (halt_if_hws_hang) + schedule(); if (reset_queues_on_hws_hang(dqm)) { - while (halt_if_hws_hang) - schedule(); dqm->is_hws_hang = true; kfd_hws_hang(dqm); retval = -ETIME; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 9df56f8e09f9..bcddd989c7f3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -86,9 +86,12 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) if (pdd->already_dequeued) return; - + /* The MES context flush needs to filter out the case which the + * KFD process is created without setting up the MES context and + * queue for creating a compute queue. + */ dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd); - if (dev->kfd->shared_resources.enable_mes && + if (dev->kfd->shared_resources.enable_mes && !!pdd->proc_ctx_gpu_addr && down_read_trylock(&dev->adev->reset_domain->sem)) { amdgpu_mes_flush_shader_debugger(dev->adev, pdd->proc_ctx_gpu_addr); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b0e66c05d811..ac3fd81fecef 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -12326,10 +12326,14 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, if (edid && (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT || sink->sink_signal == SIGNAL_TYPE_EDP)) { - amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq; - amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq; - if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10) - freesync_capable = true; + if (amdgpu_dm_connector->dc_link && + amdgpu_dm_connector->dc_link->dpcd_caps.allow_invalid_MSA_timing_param) { + amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq; + amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq; + if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10) + freesync_capable = true; + } + parse_amd_vsdb(amdgpu_dm_connector, edid, &vsdb_info); if (vsdb_info.replay_mode) { diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 1f974ea3b0c6..1648226586e2 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -89,7 +89,7 @@ #define mmCLK1_CLK4_ALLOW_DS 0x16EA8 #define mmCLK1_CLK5_ALLOW_DS 0x16EB1 -#define mmCLK5_spll_field_8 0x1B04B +#define mmCLK5_spll_field_8 0x1B24B #define mmDENTIST_DISPCLK_CNTL 0x0124 #define regDENTIST_DISPCLK_CNTL 0x0064 #define regDENTIST_DISPCLK_CNTL_BASE_IDX 1 diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index f6b029354327..83163d7c7f00 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1732,7 +1732,6 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu, gpu_metrics->average_gfx_activity = metrics.AverageGfxActivity; gpu_metrics->average_umc_activity = metrics.AverageUclkActivity; - gpu_metrics->average_mm_activity = 0; /* Valid power data is available only from primary die */ if (aldebaran_is_primary(smu)) { diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 2406cda75b7b..5384d1bb4923 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -4802,7 +4802,7 @@ err_unlock: return ret; } -static struct ctl_table oa_table[] = { +static const struct ctl_table oa_table[] = { { .procname = "perf_stream_paranoid", .data = &i915_perf_stream_paranoid, diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index cd25e5afe55a..f22ad2882697 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -358,7 +358,7 @@ static const struct of_device_id mtk_drm_of_ids[] = { }; MODULE_DEVICE_TABLE(of, mtk_drm_of_ids); -static int mtk_drm_match(struct device *dev, void *data) +static int mtk_drm_match(struct device *dev, const void *data) { if (!strncmp(dev_name(dev), "mediatek-drm", sizeof("mediatek-drm") - 1)) return true; diff --git a/drivers/gpu/drm/xe/xe_observation.c b/drivers/gpu/drm/xe/xe_observation.c index 8ec1b84cbb9e..57cf01efc07f 100644 --- a/drivers/gpu/drm/xe/xe_observation.c +++ b/drivers/gpu/drm/xe/xe_observation.c @@ -56,7 +56,7 @@ int xe_observation_ioctl(struct drm_device *dev, void *data, struct drm_file *fi } } -static struct ctl_table observation_ctl_table[] = { +static const struct ctl_table observation_ctl_table[] = { { .procname = "observation_paranoid", .data = &xe_observation_paranoid, diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index af5d1dc451f6..f2e6f55d6ca6 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -141,7 +141,7 @@ static int sysctl_record_panic_msg = 1; * sysctl option to allow the user to control whether kmsg data should be * reported to Hyper-V on panic. */ -static struct ctl_table hv_ctl_table[] = { +static const struct ctl_table hv_ctl_table[] = { { .procname = "hyperv_record_panic_msg", .data = &sysctl_record_panic_msg, diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index b7c0b1e3c23b..9703d60e9bbf 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -332,7 +332,7 @@ static int hwmon_attr_base(enum hwmon_sensor_types type) static DEFINE_MUTEX(hwmon_pec_mutex); -static int hwmon_match_device(struct device *dev, void *data) +static int hwmon_match_device(struct device *dev, const void *data) { return dev->class == &hwmon_class; } diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index eec95c724b25..fc438f445771 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -756,6 +756,7 @@ config I2C_IMX config I2C_IMX_LPI2C tristate "IMX Low Power I2C interface" depends on ARCH_MXC || COMPILE_TEST + select I2C_SLAVE help Say Y here if you want to use the Low Power IIC bus controller on the Freescale i.MX processors. diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index c24ccefb015e..5546184df05f 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -583,6 +583,9 @@ static int i2c_device_probe(struct device *dev) goto err_detach_pm_domain; } + client->debugfs = debugfs_create_dir(dev_name(&client->dev), + client->adapter->debugfs); + if (driver->probe) status = driver->probe(client); else @@ -602,6 +605,7 @@ static int i2c_device_probe(struct device *dev) return 0; err_release_driver_resources: + debugfs_remove_recursive(client->debugfs); devres_release_group(&client->dev, client->devres_group_id); err_detach_pm_domain: dev_pm_domain_detach(&client->dev, do_power_on); @@ -627,6 +631,8 @@ static void i2c_device_remove(struct device *dev) driver->remove(client); } + debugfs_remove_recursive(client->debugfs); + devres_release_group(&client->dev, client->devres_group_id); dev_pm_domain_detach(&client->dev, true); @@ -1015,8 +1021,6 @@ i2c_new_client_device(struct i2c_adapter *adap, struct i2c_board_info const *inf if (status) goto out_remove_swnode; - client->debugfs = debugfs_create_dir(dev_name(&client->dev), adap->debugfs); - dev_dbg(&adap->dev, "client [%s] registered with bus id %s\n", client->name, dev_name(&client->dev)); @@ -1061,7 +1065,6 @@ void i2c_unregister_device(struct i2c_client *client) if (ACPI_COMPANION(&client->dev)) acpi_device_clear_enumerated(ACPI_COMPANION(&client->dev)); - debugfs_remove_recursive(client->debugfs); device_remove_software_node(&client->dev); device_unregister(&client->dev); } @@ -1310,7 +1313,7 @@ new_device_store(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_WO(new_device); -static int __i2c_find_user_addr(struct device *dev, void *addrp) +static int __i2c_find_user_addr(struct device *dev, const void *addrp) { struct i2c_client *client = i2c_verify_client(dev); unsigned short addr = *(unsigned short *)addrp; diff --git a/drivers/leds/leds-turris-omnia.c b/drivers/leds/leds-turris-omnia.c index 7d3b24c8ecae..4fe1a9c0bc1b 100644 --- a/drivers/leds/leds-turris-omnia.c +++ b/drivers/leds/leds-turris-omnia.c @@ -438,7 +438,7 @@ static int omnia_mcu_get_features(const struct i2c_client *mcu_client) return reply; } -static int omnia_match_mcu_client(struct device *dev, void *data) +static int omnia_match_mcu_client(struct device *dev, const void *data) { struct i2c_client *client; diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index ec4ecd96e6b1..23c09d22fcdb 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -2355,7 +2355,10 @@ static int bitmap_get_stats(void *data, struct md_bitmap_stats *stats) if (!bitmap) return -ENOENT; - + if (bitmap->mddev->bitmap_info.external) + return -ENOENT; + if (!bitmap->storage.sb_page) /* no superblock */ + return -EINVAL; sb = kmap_local_page(bitmap->storage.sb_page); stats->sync_size = le64_to_cpu(sb->sync_size); kunmap_local(sb); diff --git a/drivers/md/md.c b/drivers/md/md.c index 866015b681af..30b3dbbce2d2 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -294,7 +294,7 @@ void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev) static struct ctl_table_header *raid_table_header; -static struct ctl_table raid_table[] = { +static const struct ctl_table raid_table[] = { { .procname = "speed_limit_min", .data = &sysctl_speed_limit_min, @@ -8376,6 +8376,10 @@ static int md_seq_show(struct seq_file *seq, void *v) return 0; spin_unlock(&all_mddevs_lock); + + /* prevent bitmap to be freed after checking */ + mutex_lock(&mddev->bitmap_info.mutex); + spin_lock(&mddev->lock); if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) { seq_printf(seq, "%s : ", mdname(mddev)); @@ -8451,6 +8455,7 @@ static int md_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "\n"); } spin_unlock(&mddev->lock); + mutex_unlock(&mddev->bitmap_info.mutex); spin_lock(&all_mddevs_lock); if (mddev == list_last_entry(&all_mddevs, struct mddev, all_mddevs)) diff --git a/drivers/media/pci/mgb4/mgb4_core.c b/drivers/media/pci/mgb4/mgb4_core.c index 8ceaed5c1453..f90ffc4dad52 100644 --- a/drivers/media/pci/mgb4/mgb4_core.c +++ b/drivers/media/pci/mgb4/mgb4_core.c @@ -125,7 +125,7 @@ static const struct hwmon_chip_info temp_chip_info = { }; #endif -static int match_i2c_adap(struct device *dev, void *data) +static int match_i2c_adap(struct device *dev, const void *data) { return i2c_verify_adapter(dev) ? 1 : 0; } @@ -141,7 +141,7 @@ static struct i2c_adapter *get_i2c_adap(struct platform_device *pdev) return dev ? to_i2c_adapter(dev) : NULL; } -static int match_spi_adap(struct device *dev, void *data) +static int match_spi_adap(struct device *dev, const void *data) { return to_spi_device(dev) ? 1 : 0; } diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c index 61b66e318488..7a3c34306de9 100644 --- a/drivers/misc/sgi-xp/xpc_main.c +++ b/drivers/misc/sgi-xp/xpc_main.c @@ -93,7 +93,7 @@ int xpc_disengage_timelimit = XPC_DISENGAGE_DEFAULT_TIMELIMIT; static int xpc_disengage_min_timelimit; /* = 0 */ static int xpc_disengage_max_timelimit = 120; -static struct ctl_table xpc_sys_xpc_hb[] = { +static const struct ctl_table xpc_sys_xpc_hb[] = { { .procname = "hb_interval", .data = &xpc_hb_interval, @@ -111,7 +111,7 @@ static struct ctl_table xpc_sys_xpc_hb[] = { .extra1 = &xpc_hb_check_min_interval, .extra2 = &xpc_hb_check_max_interval}, }; -static struct ctl_table xpc_sys_xpc[] = { +static const struct ctl_table xpc_sys_xpc[] = { { .procname = "disengage_timelimit", .data = &xpc_disengage_timelimit, diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index ee7e1d908986..847c11542f02 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -404,6 +404,7 @@ out_list_del: int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old) { unsigned long flags; + unsigned int memflags; lockdep_assert_held(&mtd_table_mutex); @@ -420,10 +421,10 @@ int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old) spin_unlock_irqrestore(&old->queue_lock, flags); /* freeze+quiesce queue to ensure all requests are flushed */ - blk_mq_freeze_queue(old->rq); + memflags = blk_mq_freeze_queue(old->rq); blk_mq_quiesce_queue(old->rq); blk_mq_unquiesce_queue(old->rq); - blk_mq_unfreeze_queue(old->rq); + blk_mq_unfreeze_queue(old->rq, memflags); /* If the device is currently open, tell trans driver to close it, then put mtd device, and don't touch it again */ diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 30be4ed68fad..ef6a22f372f9 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -1537,7 +1537,7 @@ static int ubi_mtd_param_parse(const char *val, const struct kernel_param *kp) if (token) { int err = kstrtoint(token, 10, &p->ubi_num); - if (err) { + if (err || p->ubi_num < UBI_DEV_NUM_AUTO) { pr_err("UBI error: bad value for ubi_num parameter: %s\n", token); return -EINVAL; diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c index 6bb80d7714bc..b700a0efaa93 100644 --- a/drivers/mtd/ubi/cdev.c +++ b/drivers/mtd/ubi/cdev.c @@ -828,6 +828,70 @@ out_free: return err; } +static int ubi_get_ec_info(struct ubi_device *ubi, struct ubi_ecinfo_req __user *ureq) +{ + struct ubi_ecinfo_req req; + struct ubi_wl_entry *wl; + int read_cnt; + int peb; + int end_peb; + + /* Copy the input arguments */ + if (copy_from_user(&req, ureq, sizeof(struct ubi_ecinfo_req))) + return -EFAULT; + + /* Check input arguments */ + if (req.length <= 0 || req.start < 0 || req.start >= ubi->peb_count) + return -EINVAL; + + if (check_add_overflow(req.start, req.length, &end_peb)) + return -EINVAL; + + if (end_peb > ubi->peb_count) + end_peb = ubi->peb_count; + + /* Check access rights before filling erase_counters array */ + if (!access_ok((void __user *)ureq->erase_counters, + (end_peb-req.start) * sizeof(int32_t))) + return -EFAULT; + + /* Fill erase counter array */ + read_cnt = 0; + for (peb = req.start; peb < end_peb; read_cnt++, peb++) { + int ec; + + if (ubi_io_is_bad(ubi, peb)) { + if (__put_user(UBI_UNKNOWN, ureq->erase_counters+read_cnt)) + return -EFAULT; + + continue; + } + + spin_lock(&ubi->wl_lock); + + wl = ubi->lookuptbl[peb]; + if (wl) + ec = wl->ec; + else + ec = UBI_UNKNOWN; + + spin_unlock(&ubi->wl_lock); + + if (__put_user(ec, ureq->erase_counters+read_cnt)) + return -EFAULT; + + } + + /* Return actual read length */ + req.read_length = read_cnt; + + /* Copy everything except erase counter array */ + if (copy_to_user(ureq, &req, sizeof(struct ubi_ecinfo_req))) + return -EFAULT; + + return 0; +} + static long ubi_cdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -991,6 +1055,12 @@ static long ubi_cdev_ioctl(struct file *file, unsigned int cmd, break; } + case UBI_IOCECNFO: + { + err = ubi_get_ec_info(ubi, argp); + break; + } + default: err = -ENOTTY; break; diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index 26cc53ad34ec..c792b9bcab9b 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -549,7 +549,6 @@ struct ubi_debug_info { * @peb_buf: a buffer of PEB size used for different purposes * @buf_mutex: protects @peb_buf * @ckvol_mutex: serializes static volume checking when opening - * @wl_reboot_notifier: close all wear-leveling work before reboot * * @dbg: debugging information for this UBI device */ @@ -652,7 +651,6 @@ struct ubi_device { void *peb_buf; struct mutex buf_mutex; struct mutex ckvol_mutex; - struct notifier_block wl_reboot_notifier; struct ubi_debug_info dbg; }; diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 4f6f339d8fb8..fbd399cf6503 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -89,7 +89,6 @@ #include <linux/crc32.h> #include <linux/freezer.h> #include <linux/kthread.h> -#include <linux/reboot.h> #include "ubi.h" #include "wl.h" @@ -128,8 +127,6 @@ static int self_check_in_wl_tree(const struct ubi_device *ubi, struct ubi_wl_entry *e, struct rb_root *root); static int self_check_in_pq(const struct ubi_device *ubi, struct ubi_wl_entry *e); -static int ubi_wl_reboot_notifier(struct notifier_block *n, - unsigned long state, void *cmd); /** * wl_tree_add - add a wear-leveling entry to a WL RB-tree. @@ -1953,13 +1950,6 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) if (!ubi->ro_mode && !ubi->fm_disabled) ubi_ensure_anchor_pebs(ubi); #endif - - if (!ubi->wl_reboot_notifier.notifier_call) { - ubi->wl_reboot_notifier.notifier_call = ubi_wl_reboot_notifier; - ubi->wl_reboot_notifier.priority = 1; /* Higher than MTD */ - register_reboot_notifier(&ubi->wl_reboot_notifier); - } - return 0; out_free: @@ -2005,17 +1995,6 @@ void ubi_wl_close(struct ubi_device *ubi) kfree(ubi->lookuptbl); } -static int ubi_wl_reboot_notifier(struct notifier_block *n, - unsigned long state, void *cmd) -{ - struct ubi_device *ubi; - - ubi = container_of(n, struct ubi_device, wl_reboot_notifier); - ubi_wl_close(ubi); - - return NOTIFY_DONE; -} - /** * self_check_ec - make sure that the erase counter of a PEB is correct. * @ubi: UBI device description object diff --git a/drivers/mux/core.c b/drivers/mux/core.c index 78c0022697ec..02be4ba37257 100644 --- a/drivers/mux/core.c +++ b/drivers/mux/core.c @@ -42,7 +42,7 @@ struct mux_state { unsigned int state; }; -static struct class mux_class = { +static const struct class mux_class = { .name = "mux", }; diff --git a/drivers/net/bonding/bond_debugfs.c b/drivers/net/bonding/bond_debugfs.c index b19492a7f6ad..8adbec7c5084 100644 --- a/drivers/net/bonding/bond_debugfs.c +++ b/drivers/net/bonding/bond_debugfs.c @@ -63,13 +63,8 @@ void bond_debug_unregister(struct bonding *bond) void bond_debug_reregister(struct bonding *bond) { - struct dentry *d; - - d = debugfs_rename(bonding_debug_root, bond->debug_dir, - bonding_debug_root, bond->dev->name); - if (!IS_ERR(d)) { - bond->debug_dir = d; - } else { + int err = debugfs_change_name(bond->debug_dir, "%s", bond->dev->name); + if (err) { netdev_warn(bond->dev, "failed to reregister, so just unregister old one\n"); bond_debug_unregister(bond); } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 7b78c2bada81..e45bba240cbc 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1538,17 +1538,20 @@ static netdev_features_t bond_fix_features(struct net_device *dev, NETIF_F_HIGHDMA | NETIF_F_LRO) #define BOND_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ - NETIF_F_RXCSUM | NETIF_F_GSO_SOFTWARE) + NETIF_F_RXCSUM | NETIF_F_GSO_SOFTWARE | \ + NETIF_F_GSO_PARTIAL) #define BOND_MPLS_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ NETIF_F_GSO_SOFTWARE) +#define BOND_GSO_PARTIAL_FEATURES (NETIF_F_GSO_ESP) + static void bond_compute_features(struct bonding *bond) { + netdev_features_t gso_partial_features = BOND_GSO_PARTIAL_FEATURES; unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; - netdev_features_t gso_partial_features = NETIF_F_GSO_ESP; netdev_features_t vlan_features = BOND_VLAN_FEATURES; netdev_features_t enc_features = BOND_ENC_FEATURES; #ifdef CONFIG_XFRM_OFFLOAD @@ -1582,8 +1585,9 @@ static void bond_compute_features(struct bonding *bond) BOND_XFRM_FEATURES); #endif /* CONFIG_XFRM_OFFLOAD */ - if (slave->dev->hw_enc_features & NETIF_F_GSO_PARTIAL) - gso_partial_features &= slave->dev->gso_partial_features; + gso_partial_features = netdev_increment_features(gso_partial_features, + slave->dev->gso_partial_features, + BOND_GSO_PARTIAL_FEATURES); mpls_features = netdev_increment_features(mpls_features, slave->dev->mpls_features, @@ -1598,12 +1602,8 @@ static void bond_compute_features(struct bonding *bond) } bond_dev->hard_header_len = max_hard_header_len; - if (gso_partial_features & NETIF_F_GSO_ESP) - bond_dev->gso_partial_features |= NETIF_F_GSO_ESP; - else - bond_dev->gso_partial_features &= ~NETIF_F_GSO_ESP; - done: + bond_dev->gso_partial_features = gso_partial_features; bond_dev->vlan_features = vlan_features; bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL | NETIF_F_HW_VLAN_CTAG_TX | @@ -6046,6 +6046,7 @@ void bond_setup(struct net_device *bond_dev) bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL; bond_dev->features |= bond_dev->hw_features; bond_dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; + bond_dev->features |= NETIF_F_GSO_PARTIAL; #ifdef CONFIG_XFRM_OFFLOAD bond_dev->hw_features |= BOND_XFRM_FEATURES; /* Only enable XFRM features if this is an active-backup config */ diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c index b0a6c96b6ef4..b35808d3d07f 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c @@ -505,21 +505,6 @@ void xgbe_debugfs_exit(struct xgbe_prv_data *pdata) void xgbe_debugfs_rename(struct xgbe_prv_data *pdata) { - char *buf; - - if (!pdata->xgbe_debugfs) - return; - - buf = kasprintf(GFP_KERNEL, "amd-xgbe-%s", pdata->netdev->name); - if (!buf) - return; - - if (!strcmp(pdata->xgbe_debugfs->d_name.name, buf)) - goto out; - - debugfs_rename(pdata->xgbe_debugfs->d_parent, pdata->xgbe_debugfs, - pdata->xgbe_debugfs->d_parent, buf); - -out: - kfree(buf); + debugfs_change_name(pdata->xgbe_debugfs, + "amd-xgbe-%s", pdata->netdev->name); } diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h index d73ef262991d..6fee9a41839c 100644 --- a/drivers/net/ethernet/broadcom/bgmac.h +++ b/drivers/net/ethernet/broadcom/bgmac.h @@ -328,8 +328,7 @@ #define BGMAC_RX_FRAME_OFFSET 30 /* There are 2 unused bytes between header and real data */ #define BGMAC_RX_BUF_OFFSET (NET_SKB_PAD + NET_IP_ALIGN - \ BGMAC_RX_FRAME_OFFSET) -/* Jumbo frame size with FCS */ -#define BGMAC_RX_MAX_FRAME_SIZE 9724 +#define BGMAC_RX_MAX_FRAME_SIZE 1536 #define BGMAC_RX_BUF_SIZE (BGMAC_RX_FRAME_OFFSET + BGMAC_RX_MAX_FRAME_SIZE) #define BGMAC_RX_ALLOC_SIZE (SKB_DATA_ALIGN(BGMAC_RX_BUF_SIZE + BGMAC_RX_BUF_OFFSET) + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 9cc8db10a8d6..1c94bf1db718 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -7424,7 +7424,7 @@ static void tg3_napi_enable(struct tg3 *tp) for (i = 0; i < tp->irq_cnt; i++) { tnapi = &tp->napi[i]; - napi_enable(&tnapi->napi); + napi_enable_locked(&tnapi->napi); if (tnapi->tx_buffers) { netif_queue_set_napi(tp->dev, txq_idx, NETDEV_QUEUE_TYPE_TX, @@ -7445,9 +7445,10 @@ static void tg3_napi_init(struct tg3 *tp) int i; for (i = 0; i < tp->irq_cnt; i++) { - netif_napi_add(tp->dev, &tp->napi[i].napi, - i ? tg3_poll_msix : tg3_poll); - netif_napi_set_irq(&tp->napi[i].napi, tp->napi[i].irq_vec); + netif_napi_add_locked(tp->dev, &tp->napi[i].napi, + i ? tg3_poll_msix : tg3_poll); + netif_napi_set_irq_locked(&tp->napi[i].napi, + tp->napi[i].irq_vec); } } @@ -11259,6 +11260,8 @@ static void tg3_timer_stop(struct tg3 *tp) static int tg3_restart_hw(struct tg3 *tp, bool reset_phy) __releases(tp->lock) __acquires(tp->lock) + __releases(tp->dev->lock) + __acquires(tp->dev->lock) { int err; @@ -11271,7 +11274,9 @@ static int tg3_restart_hw(struct tg3 *tp, bool reset_phy) tg3_timer_stop(tp); tp->irq_sync = 0; tg3_napi_enable(tp); + netdev_unlock(tp->dev); dev_close(tp->dev); + netdev_lock(tp->dev); tg3_full_lock(tp, 0); } return err; @@ -11299,6 +11304,7 @@ static void tg3_reset_task(struct work_struct *work) tg3_netif_stop(tp); + netdev_lock(tp->dev); tg3_full_lock(tp, 1); if (tg3_flag(tp, TX_RECOVERY_PENDING)) { @@ -11318,12 +11324,14 @@ static void tg3_reset_task(struct work_struct *work) * call cancel_work_sync() and wait forever. */ tg3_flag_clear(tp, RESET_TASK_PENDING); + netdev_unlock(tp->dev); dev_close(tp->dev); goto out; } tg3_netif_start(tp); tg3_full_unlock(tp); + netdev_unlock(tp->dev); tg3_phy_start(tp); tg3_flag_clear(tp, RESET_TASK_PENDING); out: @@ -11683,9 +11691,11 @@ static int tg3_start(struct tg3 *tp, bool reset_phy, bool test_irq, if (err) goto out_ints_fini; + netdev_lock(dev); tg3_napi_init(tp); tg3_napi_enable(tp); + netdev_unlock(dev); for (i = 0; i < tp->irq_cnt; i++) { err = tg3_request_irq(tp, i); @@ -12569,6 +12579,7 @@ static int tg3_set_ringparam(struct net_device *dev, irq_sync = 1; } + netdev_lock(dev); tg3_full_lock(tp, irq_sync); tp->rx_pending = ering->rx_pending; @@ -12597,6 +12608,7 @@ static int tg3_set_ringparam(struct net_device *dev, } tg3_full_unlock(tp); + netdev_unlock(dev); if (irq_sync && !err) tg3_phy_start(tp); @@ -12678,6 +12690,7 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam irq_sync = 1; } + netdev_lock(dev); tg3_full_lock(tp, irq_sync); if (epause->autoneg) @@ -12707,6 +12720,7 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam } tg3_full_unlock(tp); + netdev_unlock(dev); } tp->phy_flags |= TG3_PHYFLG_USER_CONFIGURED; @@ -13911,6 +13925,7 @@ static void tg3_self_test(struct net_device *dev, struct ethtool_test *etest, data[TG3_INTERRUPT_TEST] = 1; } + netdev_lock(dev); tg3_full_lock(tp, 0); tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); @@ -13922,6 +13937,7 @@ static void tg3_self_test(struct net_device *dev, struct ethtool_test *etest, } tg3_full_unlock(tp); + netdev_unlock(dev); if (irq_sync && !err2) tg3_phy_start(tp); @@ -14365,6 +14381,7 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu) tg3_set_mtu(dev, tp, new_mtu); + netdev_lock(dev); tg3_full_lock(tp, 1); tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); @@ -14384,6 +14401,7 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu) tg3_netif_start(tp); tg3_full_unlock(tp); + netdev_unlock(dev); if (!err) tg3_phy_start(tp); @@ -18164,6 +18182,7 @@ static int tg3_resume(struct device *device) netif_device_attach(dev); + netdev_lock(dev); tg3_full_lock(tp, 0); tg3_ape_driver_state_change(tp, RESET_KIND_INIT); @@ -18180,6 +18199,7 @@ static int tg3_resume(struct device *device) out: tg3_full_unlock(tp); + netdev_unlock(dev); if (!err) tg3_phy_start(tp); @@ -18260,7 +18280,9 @@ static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev, done: if (state == pci_channel_io_perm_failure) { if (netdev) { + netdev_lock(netdev); tg3_napi_enable(tp); + netdev_unlock(netdev); dev_close(netdev); } err = PCI_ERS_RESULT_DISCONNECT; @@ -18314,7 +18336,9 @@ static pci_ers_result_t tg3_io_slot_reset(struct pci_dev *pdev) done: if (rc != PCI_ERS_RESULT_RECOVERED && netdev && netif_running(netdev)) { + netdev_lock(netdev); tg3_napi_enable(tp); + netdev_unlock(netdev); dev_close(netdev); } rtnl_unlock(); @@ -18340,12 +18364,14 @@ static void tg3_io_resume(struct pci_dev *pdev) if (!netdev || !netif_running(netdev)) goto done; + netdev_lock(netdev); tg3_full_lock(tp, 0); tg3_ape_driver_state_change(tp, RESET_KIND_INIT); tg3_flag_set(tp, INIT_COMPLETE); err = tg3_restart_hw(tp, true); if (err) { tg3_full_unlock(tp); + netdev_unlock(netdev); netdev_err(netdev, "Cannot restart hardware after reset.\n"); goto done; } @@ -18357,6 +18383,7 @@ static void tg3_io_resume(struct pci_dev *pdev) tg3_netif_start(tp); tg3_full_unlock(tp); + netdev_unlock(netdev); tg3_phy_start(tp); diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index 8735e333034c..b87eaf0c250c 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -1777,10 +1777,11 @@ static void dm9000_drv_remove(struct platform_device *pdev) unregister_netdev(ndev); dm9000_release_board(pdev, dm); - free_netdev(ndev); /* free device structure */ if (dm->power_supply) regulator_disable(dm->power_supply); + free_netdev(ndev); /* free device structure */ + dev_dbg(&pdev->dev, "released and freed device\n"); } diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 68725506a095..f7c4ce8e9a26 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -840,6 +840,8 @@ static int fec_enet_txq_submit_tso(struct fec_enet_priv_tx_q *txq, struct fec_enet_private *fep = netdev_priv(ndev); int hdr_len, total_len, data_left; struct bufdesc *bdp = txq->bd.cur; + struct bufdesc *tmp_bdp; + struct bufdesc_ex *ebdp; struct tso_t tso; unsigned int index = 0; int ret; @@ -913,7 +915,34 @@ static int fec_enet_txq_submit_tso(struct fec_enet_priv_tx_q *txq, return 0; err_release: - /* TODO: Release all used data descriptors for TSO */ + /* Release all used data descriptors for TSO */ + tmp_bdp = txq->bd.cur; + + while (tmp_bdp != bdp) { + /* Unmap data buffers */ + if (tmp_bdp->cbd_bufaddr && + !IS_TSO_HEADER(txq, fec32_to_cpu(tmp_bdp->cbd_bufaddr))) + dma_unmap_single(&fep->pdev->dev, + fec32_to_cpu(tmp_bdp->cbd_bufaddr), + fec16_to_cpu(tmp_bdp->cbd_datlen), + DMA_TO_DEVICE); + + /* Clear standard buffer descriptor fields */ + tmp_bdp->cbd_sc = 0; + tmp_bdp->cbd_datlen = 0; + tmp_bdp->cbd_bufaddr = 0; + + /* Handle extended descriptor if enabled */ + if (fep->bufdesc_ex) { + ebdp = (struct bufdesc_ex *)tmp_bdp; + ebdp->cbd_esc = 0; + } + + tmp_bdp = fec_enet_get_nextdesc(tmp_bdp, &txq->bd); + } + + dev_kfree_skb_any(skb); + return ret; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.c b/drivers/net/ethernet/hisilicon/hns3/hnae3.c index 9a63fbc69408..b25fb400f476 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.c +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.c @@ -40,6 +40,21 @@ EXPORT_SYMBOL(hnae3_unregister_ae_algo_prepare); */ static DEFINE_MUTEX(hnae3_common_lock); +/* ensure the drivers being unloaded one by one */ +static DEFINE_MUTEX(hnae3_unload_lock); + +void hnae3_acquire_unload_lock(void) +{ + mutex_lock(&hnae3_unload_lock); +} +EXPORT_SYMBOL(hnae3_acquire_unload_lock); + +void hnae3_release_unload_lock(void) +{ + mutex_unlock(&hnae3_unload_lock); +} +EXPORT_SYMBOL(hnae3_release_unload_lock); + static bool hnae3_client_match(enum hnae3_client_type client_type) { if (client_type == HNAE3_CLIENT_KNIC || diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 12ba380eb701..4e44f28288f9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -963,4 +963,6 @@ int hnae3_register_client(struct hnae3_client *client); void hnae3_set_client_init_flag(struct hnae3_client *client, struct hnae3_ae_dev *ae_dev, unsigned int inited); +void hnae3_acquire_unload_lock(void); +void hnae3_release_unload_lock(void); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index a7e3b22f641c..9ff797fb36c4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -6002,9 +6002,11 @@ module_init(hns3_init_module); */ static void __exit hns3_exit_module(void) { + hnae3_acquire_unload_lock(); pci_unregister_driver(&hns3_driver); hnae3_unregister_client(&client); hns3_dbg_unregister_debugfs(); + hnae3_release_unload_lock(); } module_exit(hns3_exit_module); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index db7845009252..3f17b3073e50 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -12919,9 +12919,11 @@ static int __init hclge_init(void) static void __exit hclge_exit(void) { + hnae3_acquire_unload_lock(); hnae3_unregister_ae_algo_prepare(&ae_algo); hnae3_unregister_ae_algo(&ae_algo); destroy_workqueue(hclge_wq); + hnae3_release_unload_lock(); } module_init(hclge_init); module_exit(hclge_exit); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 163c6e59ea4c..9ba767740a04 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -3410,8 +3410,10 @@ static int __init hclgevf_init(void) static void __exit hclgevf_exit(void) { + hnae3_acquire_unload_lock(); hnae3_unregister_ae_algo(&ae_algovf); destroy_workqueue(hclgevf_wq); + hnae3_release_unload_lock(); } module_init(hclgevf_init); module_exit(hclgevf_exit); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index cbfaaa5b7d02..2d7a18fcc3be 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -773,6 +773,11 @@ iavf_vlan_filter *iavf_add_vlan(struct iavf_adapter *adapter, f->state = IAVF_VLAN_ADD; adapter->num_vlan_filters++; iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_ADD_VLAN_FILTER); + } else if (f->state == IAVF_VLAN_REMOVE) { + /* IAVF_VLAN_REMOVE means that VLAN wasn't yet removed. + * We can safely only change the state here. + */ + f->state = IAVF_VLAN_ACTIVE; } clearout: @@ -793,8 +798,18 @@ static void iavf_del_vlan(struct iavf_adapter *adapter, struct iavf_vlan vlan) f = iavf_find_vlan(adapter, vlan); if (f) { - f->state = IAVF_VLAN_REMOVE; - iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_DEL_VLAN_FILTER); + /* IAVF_ADD_VLAN means that VLAN wasn't even added yet. + * Remove it from the list. + */ + if (f->state == IAVF_VLAN_ADD) { + list_del(&f->list); + kfree(f); + adapter->num_vlan_filters--; + } else { + f->state = IAVF_VLAN_REMOVE; + iavf_schedule_aq_request(adapter, + IAVF_FLAG_AQ_DEL_VLAN_FILTER); + } } spin_unlock_bh(&adapter->mac_vlan_list_lock); diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 01536a382e54..bdee499f991a 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -1498,7 +1498,6 @@ struct ice_aqc_dnl_equa_param { #define ICE_AQC_RX_EQU_POST1 (0x12 << ICE_AQC_RX_EQU_SHIFT) #define ICE_AQC_RX_EQU_BFLF (0x13 << ICE_AQC_RX_EQU_SHIFT) #define ICE_AQC_RX_EQU_BFHF (0x14 << ICE_AQC_RX_EQU_SHIFT) -#define ICE_AQC_RX_EQU_DRATE (0x15 << ICE_AQC_RX_EQU_SHIFT) #define ICE_AQC_RX_EQU_CTLE_GAINHF (0x20 << ICE_AQC_RX_EQU_SHIFT) #define ICE_AQC_RX_EQU_CTLE_GAINLF (0x21 << ICE_AQC_RX_EQU_SHIFT) #define ICE_AQC_RX_EQU_CTLE_GAINDC (0x22 << ICE_AQC_RX_EQU_SHIFT) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 3072634bf049..f241493a6ac8 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -710,7 +710,6 @@ static int ice_get_tx_rx_equa(struct ice_hw *hw, u8 serdes_num, { ICE_AQC_RX_EQU_POST1, rx, &ptr->rx_equ_post1 }, { ICE_AQC_RX_EQU_BFLF, rx, &ptr->rx_equ_bflf }, { ICE_AQC_RX_EQU_BFHF, rx, &ptr->rx_equ_bfhf }, - { ICE_AQC_RX_EQU_DRATE, rx, &ptr->rx_equ_drate }, { ICE_AQC_RX_EQU_CTLE_GAINHF, rx, &ptr->rx_equ_ctle_gainhf }, { ICE_AQC_RX_EQU_CTLE_GAINLF, rx, &ptr->rx_equ_ctle_gainlf }, { ICE_AQC_RX_EQU_CTLE_GAINDC, rx, &ptr->rx_equ_ctle_gaindc }, diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.h b/drivers/net/ethernet/intel/ice/ice_ethtool.h index 8f2ad1c172c0..23b2cfbc9684 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.h +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.h @@ -15,7 +15,6 @@ struct ice_serdes_equalization_to_ethtool { int rx_equ_post1; int rx_equ_bflf; int rx_equ_bfhf; - int rx_equ_drate; int rx_equ_ctle_gainhf; int rx_equ_ctle_gainlf; int rx_equ_ctle_gaindc; diff --git a/drivers/net/ethernet/intel/ice/ice_parser.h b/drivers/net/ethernet/intel/ice/ice_parser.h index 6509d807627c..4f56d53d56b9 100644 --- a/drivers/net/ethernet/intel/ice/ice_parser.h +++ b/drivers/net/ethernet/intel/ice/ice_parser.h @@ -257,7 +257,6 @@ ice_pg_nm_cam_match(struct ice_pg_nm_cam_item *table, int size, /*** ICE_SID_RXPARSER_BOOST_TCAM and ICE_SID_LBL_RXPARSER_TMEM sections ***/ #define ICE_BST_TCAM_TABLE_SIZE 256 #define ICE_BST_TCAM_KEY_SIZE 20 -#define ICE_BST_KEY_TCAM_SIZE 19 /* Boost TCAM item */ struct ice_bst_tcam_item { @@ -401,7 +400,6 @@ u16 ice_xlt_kb_flag_get(struct ice_xlt_kb *kb, u64 pkt_flag); #define ICE_PARSER_GPR_NUM 128 #define ICE_PARSER_FLG_NUM 64 #define ICE_PARSER_ERR_NUM 16 -#define ICE_BST_KEY_SIZE 10 #define ICE_MARKER_ID_SIZE 9 #define ICE_MARKER_MAX_SIZE \ (ICE_MARKER_ID_SIZE * BITS_PER_BYTE - 1) @@ -431,13 +429,13 @@ struct ice_parser_rt { u8 pkt_buf[ICE_PARSER_MAX_PKT_LEN + ICE_PARSER_PKT_REV]; u16 pkt_len; u16 po; - u8 bst_key[ICE_BST_KEY_SIZE]; + u8 bst_key[ICE_BST_TCAM_KEY_SIZE]; struct ice_pg_cam_key pg_key; + u8 pg_prio; struct ice_alu *alu0; struct ice_alu *alu1; struct ice_alu *alu2; struct ice_pg_cam_action *action; - u8 pg_prio; struct ice_gpr_pu pu; u8 markers[ICE_MARKER_ID_SIZE]; bool protocols[ICE_PO_PAIR_SIZE]; diff --git a/drivers/net/ethernet/intel/ice/ice_parser_rt.c b/drivers/net/ethernet/intel/ice/ice_parser_rt.c index dedf5e854e4b..3995d662e050 100644 --- a/drivers/net/ethernet/intel/ice/ice_parser_rt.c +++ b/drivers/net/ethernet/intel/ice/ice_parser_rt.c @@ -125,22 +125,20 @@ static void ice_bst_key_init(struct ice_parser_rt *rt, else key[idd] = imem->b_kb.prio; - idd = ICE_BST_KEY_TCAM_SIZE - 1; + idd = ICE_BST_TCAM_KEY_SIZE - 2; for (i = idd; i >= 0; i--) { int j; j = ho + idd - i; if (j < ICE_PARSER_MAX_PKT_LEN) - key[i] = rt->pkt_buf[ho + idd - i]; + key[i] = rt->pkt_buf[j]; else key[i] = 0; } - ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Generated Boost TCAM Key:\n"); - ice_debug(rt->psr->hw, ICE_DBG_PARSER, "%02X %02X %02X %02X %02X %02X %02X %02X %02X %02X\n", - key[0], key[1], key[2], key[3], key[4], - key[5], key[6], key[7], key[8], key[9]); - ice_debug(rt->psr->hw, ICE_DBG_PARSER, "\n"); + ice_debug_array_w_prefix(rt->psr->hw, ICE_DBG_PARSER, + KBUILD_MODNAME ": Generated Boost TCAM Key", + key, ICE_BST_TCAM_KEY_SIZE); } static u16 ice_bit_rev_u16(u16 v, int len) diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq.c b/drivers/net/ethernet/intel/idpf/idpf_controlq.c index 4849590a5591..b28991dd1870 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_controlq.c +++ b/drivers/net/ethernet/intel/idpf/idpf_controlq.c @@ -376,6 +376,9 @@ int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count, if (!(le16_to_cpu(desc->flags) & IDPF_CTLQ_FLAG_DD)) break; + /* Ensure no other fields are read until DD flag is checked */ + dma_rmb(); + /* strip off FW internal code */ desc_err = le16_to_cpu(desc->ret_val) & 0xff; @@ -563,6 +566,9 @@ int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg, if (!(flags & IDPF_CTLQ_FLAG_DD)) break; + /* Ensure no other fields are read until DD flag is checked */ + dma_rmb(); + q_msg[i].vmvf_type = (flags & (IDPF_CTLQ_FLAG_FTYPE_VM | IDPF_CTLQ_FLAG_FTYPE_PF)) >> diff --git a/drivers/net/ethernet/intel/idpf/idpf_main.c b/drivers/net/ethernet/intel/idpf/idpf_main.c index f71d3182580b..b6c515d14cbf 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_main.c +++ b/drivers/net/ethernet/intel/idpf/idpf_main.c @@ -174,7 +174,8 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_master(pdev); pci_set_drvdata(pdev, adapter); - adapter->init_wq = alloc_workqueue("%s-%s-init", 0, 0, + adapter->init_wq = alloc_workqueue("%s-%s-init", + WQ_UNBOUND | WQ_MEM_RECLAIM, 0, dev_driver_string(dev), dev_name(dev)); if (!adapter->init_wq) { @@ -183,7 +184,8 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_free; } - adapter->serv_wq = alloc_workqueue("%s-%s-service", 0, 0, + adapter->serv_wq = alloc_workqueue("%s-%s-service", + WQ_UNBOUND | WQ_MEM_RECLAIM, 0, dev_driver_string(dev), dev_name(dev)); if (!adapter->serv_wq) { @@ -192,7 +194,8 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_serv_wq_alloc; } - adapter->mbx_wq = alloc_workqueue("%s-%s-mbx", 0, 0, + adapter->mbx_wq = alloc_workqueue("%s-%s-mbx", + WQ_UNBOUND | WQ_MEM_RECLAIM, 0, dev_driver_string(dev), dev_name(dev)); if (!adapter->mbx_wq) { @@ -201,7 +204,8 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_mbx_wq_alloc; } - adapter->stats_wq = alloc_workqueue("%s-%s-stats", 0, 0, + adapter->stats_wq = alloc_workqueue("%s-%s-stats", + WQ_UNBOUND | WQ_MEM_RECLAIM, 0, dev_driver_string(dev), dev_name(dev)); if (!adapter->stats_wq) { @@ -210,7 +214,8 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_stats_wq_alloc; } - adapter->vc_event_wq = alloc_workqueue("%s-%s-vc_event", 0, 0, + adapter->vc_event_wq = alloc_workqueue("%s-%s-vc_event", + WQ_UNBOUND | WQ_MEM_RECLAIM, 0, dev_driver_string(dev), dev_name(dev)); if (!adapter->vc_event_wq) { diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index d46c95f91b0d..3d2413b8684f 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -517,8 +517,10 @@ static ssize_t idpf_vc_xn_exec(struct idpf_adapter *adapter, retval = -ENXIO; goto only_unlock; case IDPF_VC_XN_WAITING: - dev_notice_ratelimited(&adapter->pdev->dev, "Transaction timed-out (op %d, %dms)\n", - params->vc_op, params->timeout_ms); + dev_notice_ratelimited(&adapter->pdev->dev, + "Transaction timed-out (op:%d cookie:%04x vc_op:%d salt:%02x timeout:%dms)\n", + params->vc_op, cookie, xn->vc_op, + xn->salt, params->timeout_ms); retval = -ETIME; break; case IDPF_VC_XN_COMPLETED_SUCCESS: @@ -612,14 +614,16 @@ idpf_vc_xn_forward_reply(struct idpf_adapter *adapter, return -EINVAL; } xn = &adapter->vcxn_mngr->ring[xn_idx]; + idpf_vc_xn_lock(xn); salt = FIELD_GET(IDPF_VC_XN_SALT_M, msg_info); if (xn->salt != salt) { - dev_err_ratelimited(&adapter->pdev->dev, "Transaction salt does not match (%02x != %02x)\n", - xn->salt, salt); + dev_err_ratelimited(&adapter->pdev->dev, "Transaction salt does not match (exp:%d@%02x(%d) != got:%d@%02x)\n", + xn->vc_op, xn->salt, xn->state, + ctlq_msg->cookie.mbx.chnl_opcode, salt); + idpf_vc_xn_unlock(xn); return -EINVAL; } - idpf_vc_xn_lock(xn); switch (xn->state) { case IDPF_VC_XN_WAITING: /* success */ @@ -3077,12 +3081,21 @@ init_failed: */ void idpf_vc_core_deinit(struct idpf_adapter *adapter) { + bool remove_in_prog; + if (!test_bit(IDPF_VC_CORE_INIT, adapter->flags)) return; + /* Avoid transaction timeouts when called during reset */ + remove_in_prog = test_bit(IDPF_REMOVE_IN_PROG, adapter->flags); + if (!remove_in_prog) + idpf_vc_xn_shutdown(adapter->vcxn_mngr); + idpf_deinit_task(adapter); idpf_intr_rel(adapter); - idpf_vc_xn_shutdown(adapter->vcxn_mngr); + + if (remove_in_prog) + idpf_vc_xn_shutdown(adapter->vcxn_mngr); cancel_delayed_work_sync(&adapter->serv_task); cancel_delayed_work_sync(&adapter->mbx_task); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 82f4333fb426..4fe121b9f94b 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4432,6 +4432,7 @@ static int mvneta_cpu_online(unsigned int cpu, struct hlist_node *node) */ if (pp->is_stopped) { spin_unlock(&pp->lock); + netdev_unlock(port->napi.dev); return 0; } netif_tx_stop_all_queues(pp->dev); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 148144f5b61d..a1f9ec03c2ce 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -917,19 +917,18 @@ static void print_npa_qsize(struct seq_file *m, struct rvu_pfvf *pfvf) /* The 'qsize' entry dumps current Aura/Pool context Qsize * and each context's current enable/disable status in a bitmap. */ -static int rvu_dbg_qsize_display(struct seq_file *filp, void *unsused, +static int rvu_dbg_qsize_display(struct seq_file *s, void *unsused, int blktype) { - void (*print_qsize)(struct seq_file *filp, + void (*print_qsize)(struct seq_file *s, struct rvu_pfvf *pfvf) = NULL; - struct dentry *current_dir; struct rvu_pfvf *pfvf; struct rvu *rvu; int qsize_id; u16 pcifunc; int blkaddr; - rvu = filp->private; + rvu = s->private; switch (blktype) { case BLKTYPE_NPA: qsize_id = rvu->rvu_dbg.npa_qsize_id; @@ -945,32 +944,28 @@ static int rvu_dbg_qsize_display(struct seq_file *filp, void *unsused, return -EINVAL; } - if (blktype == BLKTYPE_NPA) { + if (blktype == BLKTYPE_NPA) blkaddr = BLKADDR_NPA; - } else { - current_dir = filp->file->f_path.dentry->d_parent; - blkaddr = (!strcmp(current_dir->d_name.name, "nix1") ? - BLKADDR_NIX1 : BLKADDR_NIX0); - } + else + blkaddr = debugfs_get_aux_num(s->file); if (!rvu_dbg_is_valid_lf(rvu, blkaddr, qsize_id, &pcifunc)) return -EINVAL; pfvf = rvu_get_pfvf(rvu, pcifunc); - print_qsize(filp, pfvf); + print_qsize(s, pfvf); return 0; } -static ssize_t rvu_dbg_qsize_write(struct file *filp, +static ssize_t rvu_dbg_qsize_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos, int blktype) { char *blk_string = (blktype == BLKTYPE_NPA) ? "npa" : "nix"; - struct seq_file *seqfile = filp->private_data; + struct seq_file *seqfile = file->private_data; char *cmd_buf, *cmd_buf_tmp, *subtoken; struct rvu *rvu = seqfile->private; - struct dentry *current_dir; int blkaddr; u16 pcifunc; int ret, lf; @@ -996,13 +991,10 @@ static ssize_t rvu_dbg_qsize_write(struct file *filp, goto qsize_write_done; } - if (blktype == BLKTYPE_NPA) { + if (blktype == BLKTYPE_NPA) blkaddr = BLKADDR_NPA; - } else { - current_dir = filp->f_path.dentry->d_parent; - blkaddr = (!strcmp(current_dir->d_name.name, "nix1") ? - BLKADDR_NIX1 : BLKADDR_NIX0); - } + else + blkaddr = debugfs_get_aux_num(file); if (!rvu_dbg_is_valid_lf(rvu, blkaddr, lf, &pcifunc)) { ret = -EINVAL; @@ -2704,8 +2696,8 @@ static void rvu_dbg_nix_init(struct rvu *rvu, int blkaddr) &rvu_dbg_nix_ndc_tx_hits_miss_fops); debugfs_create_file("ndc_rx_hits_miss", 0600, rvu->rvu_dbg.nix, nix_hw, &rvu_dbg_nix_ndc_rx_hits_miss_fops); - debugfs_create_file("qsize", 0600, rvu->rvu_dbg.nix, rvu, - &rvu_dbg_nix_qsize_fops); + debugfs_create_file_aux_num("qsize", 0600, rvu->rvu_dbg.nix, rvu, + blkaddr, &rvu_dbg_nix_qsize_fops); debugfs_create_file("ingress_policer_ctx", 0600, rvu->rvu_dbg.nix, nix_hw, &rvu_dbg_nix_band_prof_ctx_fops); debugfs_create_file("ingress_policer_rsrc", 0600, rvu->rvu_dbg.nix, nix_hw, @@ -2854,28 +2846,14 @@ static int cgx_print_stats(struct seq_file *s, int lmac_id) return err; } -static int rvu_dbg_derive_lmacid(struct seq_file *filp, int *lmac_id) +static int rvu_dbg_derive_lmacid(struct seq_file *s) { - struct dentry *current_dir; - char *buf; - - current_dir = filp->file->f_path.dentry->d_parent; - buf = strrchr(current_dir->d_name.name, 'c'); - if (!buf) - return -EINVAL; - - return kstrtoint(buf + 1, 10, lmac_id); + return debugfs_get_aux_num(s->file); } -static int rvu_dbg_cgx_stat_display(struct seq_file *filp, void *unused) +static int rvu_dbg_cgx_stat_display(struct seq_file *s, void *unused) { - int lmac_id, err; - - err = rvu_dbg_derive_lmacid(filp, &lmac_id); - if (!err) - return cgx_print_stats(filp, lmac_id); - - return err; + return cgx_print_stats(s, rvu_dbg_derive_lmacid(s)); } RVU_DEBUG_SEQ_FOPS(cgx_stat, cgx_stat_display, NULL); @@ -2933,15 +2911,9 @@ static int cgx_print_dmac_flt(struct seq_file *s, int lmac_id) return 0; } -static int rvu_dbg_cgx_dmac_flt_display(struct seq_file *filp, void *unused) +static int rvu_dbg_cgx_dmac_flt_display(struct seq_file *s, void *unused) { - int err, lmac_id; - - err = rvu_dbg_derive_lmacid(filp, &lmac_id); - if (!err) - return cgx_print_dmac_flt(filp, lmac_id); - - return err; + return cgx_print_dmac_flt(s, rvu_dbg_derive_lmacid(s)); } RVU_DEBUG_SEQ_FOPS(cgx_dmac_flt, cgx_dmac_flt_display, NULL); @@ -2980,10 +2952,10 @@ static void rvu_dbg_cgx_init(struct rvu *rvu) rvu->rvu_dbg.lmac = debugfs_create_dir(dname, rvu->rvu_dbg.cgx); - debugfs_create_file("stats", 0600, rvu->rvu_dbg.lmac, - cgx, &rvu_dbg_cgx_stat_fops); - debugfs_create_file("mac_filter", 0600, - rvu->rvu_dbg.lmac, cgx, + debugfs_create_file_aux_num("stats", 0600, rvu->rvu_dbg.lmac, + cgx, lmac_id, &rvu_dbg_cgx_stat_fops); + debugfs_create_file_aux_num("mac_filter", 0600, + rvu->rvu_dbg.lmac, cgx, lmac_id, &rvu_dbg_cgx_dmac_flt_fops); } } diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index 25bf6ec44289..a1bada9eaaf6 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -3742,10 +3742,7 @@ static int skge_device_event(struct notifier_block *unused, skge = netdev_priv(dev); switch (event) { case NETDEV_CHANGENAME: - if (skge->debugfs) - skge->debugfs = debugfs_rename(skge_debug, - skge->debugfs, - skge_debug, dev->name); + debugfs_change_name(skge->debugfs, "%s", dev->name); break; case NETDEV_GOING_DOWN: diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 988fa28cfb5f..d7121c836508 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -4494,10 +4494,7 @@ static int sky2_device_event(struct notifier_block *unused, switch (event) { case NETDEV_CHANGENAME: - if (sky2->debugfs) { - sky2->debugfs = debugfs_rename(sky2_debug, sky2->debugfs, - sky2_debug, dev->name); - } + debugfs_change_name(sky2->debugfs, "%s", dev->name); break; case NETDEV_GOING_DOWN: diff --git a/drivers/net/ethernet/mediatek/airoha_eth.c b/drivers/net/ethernet/mediatek/airoha_eth.c index 415d784de741..09f448f29124 100644 --- a/drivers/net/ethernet/mediatek/airoha_eth.c +++ b/drivers/net/ethernet/mediatek/airoha_eth.c @@ -266,11 +266,11 @@ #define REG_GDM3_FWD_CFG GDM3_BASE #define GDM3_PAD_EN_MASK BIT(28) -#define REG_GDM4_FWD_CFG (GDM4_BASE + 0x100) +#define REG_GDM4_FWD_CFG GDM4_BASE #define GDM4_PAD_EN_MASK BIT(28) #define GDM4_SPORT_OFFSET0_MASK GENMASK(11, 8) -#define REG_GDM4_SRC_PORT_SET (GDM4_BASE + 0x33c) +#define REG_GDM4_SRC_PORT_SET (GDM4_BASE + 0x23c) #define GDM4_SPORT_OFF2_MASK GENMASK(19, 16) #define GDM4_SPORT_OFF1_MASK GENMASK(15, 12) #define GDM4_SPORT_OFF0_MASK GENMASK(11, 8) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index bd41b75d246e..a814b63ed97e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2087,7 +2087,7 @@ static struct mlx5e_xdpsq *mlx5e_open_xdpredirect_sq(struct mlx5e_channel *c, struct mlx5e_xdpsq *xdpsq; int err; - xdpsq = kvzalloc_node(sizeof(*xdpsq), GFP_KERNEL, c->cpu); + xdpsq = kvzalloc_node(sizeof(*xdpsq), GFP_KERNEL, cpu_to_node(c->cpu)); if (!xdpsq) return ERR_PTR(-ENOMEM); diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 720f577929db..499e5e39d513 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -1120,20 +1120,6 @@ static void nv_disable_hw_interrupts(struct net_device *dev, u32 mask) } } -static void nv_napi_enable(struct net_device *dev) -{ - struct fe_priv *np = get_nvpriv(dev); - - napi_enable(&np->napi); -} - -static void nv_napi_disable(struct net_device *dev) -{ - struct fe_priv *np = get_nvpriv(dev); - - napi_disable(&np->napi); -} - #define MII_READ (-1) /* mii_rw: read/write a register on the PHY. * @@ -3114,7 +3100,7 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu) * Changing the MTU is a rare event, it shouldn't matter. */ nv_disable_irq(dev); - nv_napi_disable(dev); + napi_disable(&np->napi); netif_tx_lock_bh(dev); netif_addr_lock(dev); spin_lock(&np->lock); @@ -3143,7 +3129,7 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu) spin_unlock(&np->lock); netif_addr_unlock(dev); netif_tx_unlock_bh(dev); - nv_napi_enable(dev); + napi_enable(&np->napi); nv_enable_irq(dev); } return 0; @@ -4731,7 +4717,7 @@ static int nv_set_ringparam(struct net_device *dev, if (netif_running(dev)) { nv_disable_irq(dev); - nv_napi_disable(dev); + napi_disable(&np->napi); netif_tx_lock_bh(dev); netif_addr_lock(dev); spin_lock(&np->lock); @@ -4784,7 +4770,7 @@ static int nv_set_ringparam(struct net_device *dev, spin_unlock(&np->lock); netif_addr_unlock(dev); netif_tx_unlock_bh(dev); - nv_napi_enable(dev); + napi_enable(&np->napi); nv_enable_irq(dev); } return 0; @@ -5277,7 +5263,7 @@ static void nv_self_test(struct net_device *dev, struct ethtool_test *test, u64 if (test->flags & ETH_TEST_FL_OFFLINE) { if (netif_running(dev)) { netif_stop_queue(dev); - nv_napi_disable(dev); + napi_disable(&np->napi); netif_tx_lock_bh(dev); netif_addr_lock(dev); spin_lock_irq(&np->lock); @@ -5334,7 +5320,7 @@ static void nv_self_test(struct net_device *dev, struct ethtool_test *test, u64 /* restart rx engine */ nv_start_rxtx(dev); netif_start_queue(dev); - nv_napi_enable(dev); + napi_enable(&np->napi); nv_enable_hw_interrupts(dev, np->irqmask); } } @@ -5576,6 +5562,7 @@ static int nv_open(struct net_device *dev) /* ask for interrupts */ nv_enable_hw_interrupts(dev, np->irqmask); + netdev_lock(dev); spin_lock_irq(&np->lock); writel(NVREG_MCASTADDRA_FORCE, base + NvRegMulticastAddrA); writel(0, base + NvRegMulticastAddrB); @@ -5594,7 +5581,7 @@ static int nv_open(struct net_device *dev) ret = nv_update_linkspeed(dev); nv_start_rxtx(dev); netif_start_queue(dev); - nv_napi_enable(dev); + napi_enable_locked(&np->napi); if (ret) { netif_carrier_on(dev); @@ -5611,6 +5598,7 @@ static int nv_open(struct net_device *dev) round_jiffies(jiffies + STATS_INTERVAL)); spin_unlock_irq(&np->lock); + netdev_unlock(dev); /* If the loopback feature was set while the device was down, make sure * that it's set correctly now. @@ -5632,7 +5620,7 @@ static int nv_close(struct net_device *dev) spin_lock_irq(&np->lock); np->in_shutdown = 1; spin_unlock_irq(&np->lock); - nv_napi_disable(dev); + napi_disable(&np->napi); synchronize_irq(np->pci_dev->irq); del_timer_sync(&np->oom_kick); diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index 9ce0e8a64ba8..a73dcaffa8c5 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -1684,6 +1684,7 @@ static void rtl8139_tx_timeout_task (struct work_struct *work) if (tmp8 & CmdTxEnb) RTL_W8 (ChipCmd, CmdRxEnb); + netdev_lock(dev); spin_lock_bh(&tp->rx_lock); /* Disable interrupts by clearing the interrupt mask. */ RTL_W16 (IntrMask, 0x0000); @@ -1694,11 +1695,12 @@ static void rtl8139_tx_timeout_task (struct work_struct *work) spin_unlock_irq(&tp->lock); /* ...and finally, reset everything */ - napi_enable(&tp->napi); + napi_enable_locked(&tp->napi); rtl8139_hw_start(dev); netif_wake_queue(dev); spin_unlock_bh(&tp->rx_lock); + netdev_unlock(dev); } static void rtl8139_tx_timeout(struct net_device *dev, unsigned int txqueue) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index bc395294a32d..c9f4976a3527 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -3217,10 +3217,15 @@ static int ravb_suspend(struct device *dev) netif_device_detach(ndev); - if (priv->wol_enabled) - return ravb_wol_setup(ndev); + rtnl_lock(); + if (priv->wol_enabled) { + ret = ravb_wol_setup(ndev); + rtnl_unlock(); + return ret; + } ret = ravb_close(ndev); + rtnl_unlock(); if (ret) return ret; @@ -3245,19 +3250,20 @@ static int ravb_resume(struct device *dev) if (!netif_running(ndev)) return 0; + rtnl_lock(); /* If WoL is enabled restore the interface. */ - if (priv->wol_enabled) { + if (priv->wol_enabled) ret = ravb_wol_restore(ndev); - if (ret) - return ret; - } else { + else ret = pm_runtime_force_resume(dev); - if (ret) - return ret; + if (ret) { + rtnl_unlock(); + return ret; } /* Reopening the interface will restore the device to the working state. */ ret = ravb_open(ndev); + rtnl_unlock(); if (ret < 0) goto out_rpm_put; diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 8887b8921009..5fc8027c92c7 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3494,10 +3494,12 @@ static int sh_eth_suspend(struct device *dev) netif_device_detach(ndev); + rtnl_lock(); if (mdp->wol_enabled) ret = sh_eth_wol_setup(ndev); else ret = sh_eth_close(ndev); + rtnl_unlock(); return ret; } @@ -3511,10 +3513,12 @@ static int sh_eth_resume(struct device *dev) if (!netif_running(ndev)) return 0; + rtnl_lock(); if (mdp->wol_enabled) ret = sh_eth_wol_restore(ndev); else ret = sh_eth_open(ndev); + rtnl_unlock(); if (ret < 0) return ret; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index edbf8994455d..d04543e5697b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2424,11 +2424,6 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) u32 chan = 0; u8 qmode = 0; - if (rxfifosz == 0) - rxfifosz = priv->dma_cap.rx_fifo_size; - if (txfifosz == 0) - txfifosz = priv->dma_cap.tx_fifo_size; - /* Split up the shared Tx/Rx FIFO memory on DW QoS Eth and DW XGMAC */ if (priv->plat->has_gmac4 || priv->plat->has_xgmac) { rxfifosz /= rx_channels_count; @@ -2897,11 +2892,6 @@ static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode, int rxfifosz = priv->plat->rx_fifo_size; int txfifosz = priv->plat->tx_fifo_size; - if (rxfifosz == 0) - rxfifosz = priv->dma_cap.rx_fifo_size; - if (txfifosz == 0) - txfifosz = priv->dma_cap.tx_fifo_size; - /* Adjust for real per queue fifo size */ rxfifosz /= rx_channels_count; txfifosz /= tx_channels_count; @@ -5878,9 +5868,6 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu) const int mtu = new_mtu; int ret; - if (txfifosz == 0) - txfifosz = priv->dma_cap.tx_fifo_size; - txfifosz /= priv->plat->tx_queues_to_use; if (stmmac_xdp_is_enabled(priv) && new_mtu > ETH_DATA_LEN) { @@ -6535,11 +6522,7 @@ static int stmmac_device_event(struct notifier_block *unused, switch (event) { case NETDEV_CHANGENAME: - if (priv->dbgfs_dir) - priv->dbgfs_dir = debugfs_rename(stmmac_fs_dir, - priv->dbgfs_dir, - stmmac_fs_dir, - dev->name); + debugfs_change_name(priv->dbgfs_dir, "%s", dev->name); break; } done: @@ -7221,6 +7204,50 @@ static int stmmac_hw_init(struct stmmac_priv *priv) if (priv->dma_cap.tsoen) dev_info(priv->device, "TSO supported\n"); + if (priv->dma_cap.number_rx_queues && + priv->plat->rx_queues_to_use > priv->dma_cap.number_rx_queues) { + dev_warn(priv->device, + "Number of Rx queues (%u) exceeds dma capability\n", + priv->plat->rx_queues_to_use); + priv->plat->rx_queues_to_use = priv->dma_cap.number_rx_queues; + } + if (priv->dma_cap.number_tx_queues && + priv->plat->tx_queues_to_use > priv->dma_cap.number_tx_queues) { + dev_warn(priv->device, + "Number of Tx queues (%u) exceeds dma capability\n", + priv->plat->tx_queues_to_use); + priv->plat->tx_queues_to_use = priv->dma_cap.number_tx_queues; + } + + if (!priv->plat->rx_fifo_size) { + if (priv->dma_cap.rx_fifo_size) { + priv->plat->rx_fifo_size = priv->dma_cap.rx_fifo_size; + } else { + dev_err(priv->device, "Can't specify Rx FIFO size\n"); + return -ENODEV; + } + } else if (priv->dma_cap.rx_fifo_size && + priv->plat->rx_fifo_size > priv->dma_cap.rx_fifo_size) { + dev_warn(priv->device, + "Rx FIFO size (%u) exceeds dma capability\n", + priv->plat->rx_fifo_size); + priv->plat->rx_fifo_size = priv->dma_cap.rx_fifo_size; + } + if (!priv->plat->tx_fifo_size) { + if (priv->dma_cap.tx_fifo_size) { + priv->plat->tx_fifo_size = priv->dma_cap.tx_fifo_size; + } else { + dev_err(priv->device, "Can't specify Tx FIFO size\n"); + return -ENODEV; + } + } else if (priv->dma_cap.tx_fifo_size && + priv->plat->tx_fifo_size > priv->dma_cap.tx_fifo_size) { + dev_warn(priv->device, + "Tx FIFO size (%u) exceeds dma capability\n", + priv->plat->tx_fifo_size); + priv->plat->tx_fifo_size = priv->dma_cap.tx_fifo_size; + } + priv->hw->vlan_fail_q_en = (priv->plat->flags & STMMAC_FLAG_VLAN_FAIL_Q_EN); priv->hw->vlan_fail_q = priv->plat->vlan_fail_q; diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index d7459866d24c..72177fea1cfb 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -6086,7 +6086,7 @@ static void niu_enable_napi(struct niu *np) int i; for (i = 0; i < np->num_ldg; i++) - napi_enable(&np->ldg[i].napi); + napi_enable_locked(&np->ldg[i].napi); } static void niu_disable_napi(struct niu *np) @@ -6116,7 +6116,9 @@ static int niu_open(struct net_device *dev) if (err) goto out_free_channels; + netdev_lock(dev); niu_enable_napi(np); + netdev_unlock(dev); spin_lock_irq(&np->lock); @@ -6521,6 +6523,7 @@ static void niu_reset_task(struct work_struct *work) niu_reset_buffers(np); + netdev_lock(np->dev); spin_lock_irqsave(&np->lock, flags); err = niu_init_hw(np); @@ -6531,6 +6534,7 @@ static void niu_reset_task(struct work_struct *work) } spin_unlock_irqrestore(&np->lock, flags); + netdev_unlock(np->dev); } static void niu_tx_timeout(struct net_device *dev, unsigned int txqueue) @@ -6761,7 +6765,9 @@ static int niu_change_mtu(struct net_device *dev, int new_mtu) niu_free_channels(np); + netdev_lock(dev); niu_enable_napi(np); + netdev_unlock(dev); err = niu_alloc_channels(np); if (err) @@ -9937,6 +9943,7 @@ static int __maybe_unused niu_resume(struct device *dev_d) spin_lock_irqsave(&np->lock, flags); + netdev_lock(dev); err = niu_init_hw(np); if (!err) { np->timer.expires = jiffies + HZ; @@ -9945,6 +9952,7 @@ static int __maybe_unused niu_resume(struct device *dev_d) } spin_unlock_irqrestore(&np->lock, flags); + netdev_unlock(dev); return err; } diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index 894911f3d560..e56ebbdd428d 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -1568,7 +1568,7 @@ static void init_registers(struct net_device *dev) if (rp->quirks & rqMgmt) rhine_init_cam_filter(dev); - napi_enable(&rp->napi); + napi_enable_locked(&rp->napi); iowrite16(RHINE_EVENT & 0xffff, ioaddr + IntrEnable); @@ -1696,7 +1696,10 @@ static int rhine_open(struct net_device *dev) rhine_power_init(dev); rhine_chip_reset(dev); rhine_task_enable(rp); + + netdev_lock(dev); init_registers(dev); + netdev_unlock(dev); netif_dbg(rp, ifup, dev, "%s() Done - status %04x MII status: %04x\n", __func__, ioread16(ioaddr + ChipCmd), @@ -1727,6 +1730,8 @@ static void rhine_reset_task(struct work_struct *work) napi_disable(&rp->napi); netif_tx_disable(dev); + + netdev_lock(dev); spin_lock_bh(&rp->lock); /* clear all descriptors */ @@ -1740,6 +1745,7 @@ static void rhine_reset_task(struct work_struct *work) init_registers(dev); spin_unlock_bh(&rp->lock); + netdev_unlock(dev); netif_trans_update(dev); /* prevent tx timeout */ dev->stats.tx_errors++; @@ -2541,9 +2547,12 @@ static int rhine_resume(struct device *device) alloc_tbufs(dev); rhine_reset_rbufs(rp); rhine_task_enable(rp); + + netdev_lock(dev); spin_lock_bh(&rp->lock); init_registers(dev); spin_unlock_bh(&rp->lock); + netdev_unlock(dev); netif_device_attach(dev); diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c index 3b23f3d3ca2b..5c80fbee7913 100644 --- a/drivers/net/netdevsim/ethtool.c +++ b/drivers/net/netdevsim/ethtool.c @@ -74,7 +74,7 @@ static void nsim_get_ringparam(struct net_device *dev, memcpy(ring, &ns->ethtool.ring, sizeof(ns->ethtool.ring)); kernel_ring->hds_thresh_max = NSIM_HDS_THRESHOLD_MAX; - if (kernel_ring->tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) + if (dev->cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) kernel_ring->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_ENABLED; } diff --git a/drivers/net/netdevsim/hwstats.c b/drivers/net/netdevsim/hwstats.c index 0e58aa7f0374..66b3215db3ac 100644 --- a/drivers/net/netdevsim/hwstats.c +++ b/drivers/net/netdevsim/hwstats.c @@ -331,7 +331,6 @@ enum nsim_dev_hwstats_do { }; struct nsim_dev_hwstats_fops { - const struct file_operations fops; enum nsim_dev_hwstats_do action; enum netdev_offload_xstats_type type; }; @@ -342,13 +341,12 @@ nsim_dev_hwstats_do_write(struct file *file, size_t count, loff_t *ppos) { struct nsim_dev_hwstats *hwstats = file->private_data; - struct nsim_dev_hwstats_fops *hwsfops; + const struct nsim_dev_hwstats_fops *hwsfops; struct list_head *hwsdev_list; int ifindex; int err; - hwsfops = container_of(debugfs_real_fops(file), - struct nsim_dev_hwstats_fops, fops); + hwsfops = debugfs_get_aux(file); err = kstrtoint_from_user(data, count, 0, &ifindex); if (err) @@ -381,14 +379,13 @@ nsim_dev_hwstats_do_write(struct file *file, return count; } +static struct debugfs_short_fops debugfs_ops = { + .write = nsim_dev_hwstats_do_write, + .llseek = generic_file_llseek, +}; + #define NSIM_DEV_HWSTATS_FOPS(ACTION, TYPE) \ { \ - .fops = { \ - .open = simple_open, \ - .write = nsim_dev_hwstats_do_write, \ - .llseek = generic_file_llseek, \ - .owner = THIS_MODULE, \ - }, \ .action = ACTION, \ .type = TYPE, \ } @@ -433,12 +430,12 @@ int nsim_dev_hwstats_init(struct nsim_dev *nsim_dev) goto err_remove_hwstats_recursive; } - debugfs_create_file("enable_ifindex", 0200, hwstats->l3_ddir, hwstats, - &nsim_dev_hwstats_l3_enable_fops.fops); - debugfs_create_file("disable_ifindex", 0200, hwstats->l3_ddir, hwstats, - &nsim_dev_hwstats_l3_disable_fops.fops); - debugfs_create_file("fail_next_enable", 0200, hwstats->l3_ddir, hwstats, - &nsim_dev_hwstats_l3_fail_fops.fops); + debugfs_create_file_aux("enable_ifindex", 0200, hwstats->l3_ddir, hwstats, + &nsim_dev_hwstats_l3_enable_fops, &debugfs_ops); + debugfs_create_file_aux("disable_ifindex", 0200, hwstats->l3_ddir, hwstats, + &nsim_dev_hwstats_l3_disable_fops, &debugfs_ops); + debugfs_create_file_aux("fail_next_enable", 0200, hwstats->l3_ddir, hwstats, + &nsim_dev_hwstats_l3_fail_fops, &debugfs_ops); INIT_DELAYED_WORK(&hwstats->traffic_dw, &nsim_dev_hwstats_traffic_work); diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index dcf073bc4802..96d54c08043d 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -134,6 +134,7 @@ struct netdevsim { u32 sleep; u32 __ports[2][NSIM_UDP_TUNNEL_N_PORTS]; u32 (*ports)[NSIM_UDP_TUNNEL_N_PORTS]; + struct dentry *ddir; struct debugfs_u32_array dfs_ports[2]; } udp_ports; diff --git a/drivers/net/netdevsim/udp_tunnels.c b/drivers/net/netdevsim/udp_tunnels.c index 02dc3123eb6c..640b4983a9a0 100644 --- a/drivers/net/netdevsim/udp_tunnels.c +++ b/drivers/net/netdevsim/udp_tunnels.c @@ -112,9 +112,11 @@ nsim_udp_tunnels_info_reset_write(struct file *file, const char __user *data, struct net_device *dev = file->private_data; struct netdevsim *ns = netdev_priv(dev); - memset(ns->udp_ports.ports, 0, sizeof(ns->udp_ports.__ports)); rtnl_lock(); - udp_tunnel_nic_reset_ntf(dev); + if (dev->reg_state == NETREG_REGISTERED) { + memset(ns->udp_ports.ports, 0, sizeof(ns->udp_ports.__ports)); + udp_tunnel_nic_reset_ntf(dev); + } rtnl_unlock(); return count; @@ -144,23 +146,23 @@ int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev, else ns->udp_ports.ports = nsim_dev->udp_ports.__ports; - debugfs_create_u32("udp_ports_inject_error", 0600, - ns->nsim_dev_port->ddir, + ns->udp_ports.ddir = debugfs_create_dir("udp_ports", + ns->nsim_dev_port->ddir); + + debugfs_create_u32("inject_error", 0600, ns->udp_ports.ddir, &ns->udp_ports.inject_error); ns->udp_ports.dfs_ports[0].array = ns->udp_ports.ports[0]; ns->udp_ports.dfs_ports[0].n_elements = NSIM_UDP_TUNNEL_N_PORTS; - debugfs_create_u32_array("udp_ports_table0", 0400, - ns->nsim_dev_port->ddir, + debugfs_create_u32_array("table0", 0400, ns->udp_ports.ddir, &ns->udp_ports.dfs_ports[0]); ns->udp_ports.dfs_ports[1].array = ns->udp_ports.ports[1]; ns->udp_ports.dfs_ports[1].n_elements = NSIM_UDP_TUNNEL_N_PORTS; - debugfs_create_u32_array("udp_ports_table1", 0400, - ns->nsim_dev_port->ddir, + debugfs_create_u32_array("table1", 0400, ns->udp_ports.ddir, &ns->udp_ports.dfs_ports[1]); - debugfs_create_file("udp_ports_reset", 0200, ns->nsim_dev_port->ddir, + debugfs_create_file("reset", 0200, ns->udp_ports.ddir, dev, &nsim_udp_tunnels_info_reset_fops); /* Note: it's not normal to allocate the info struct like this! @@ -196,6 +198,9 @@ int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev, void nsim_udp_tunnels_info_destroy(struct net_device *dev) { + struct netdevsim *ns = netdev_priv(dev); + + debugfs_remove_recursive(ns->udp_ports.ddir); kfree(dev->udp_tunnel_nic_info); dev->udp_tunnel_nic_info = NULL; } diff --git a/drivers/net/phy/marvell-88q2xxx.c b/drivers/net/phy/marvell-88q2xxx.c index 4494b3e39ce2..a3996471a1c9 100644 --- a/drivers/net/phy/marvell-88q2xxx.c +++ b/drivers/net/phy/marvell-88q2xxx.c @@ -95,6 +95,10 @@ #define MDIO_MMD_PCS_MV_TDR_OFF_CUTOFF 65246 +struct mv88q2xxx_priv { + bool enable_temp; +}; + struct mmd_val { int devad; u32 regnum; @@ -710,17 +714,12 @@ static const struct hwmon_chip_info mv88q2xxx_hwmon_chip_info = { static int mv88q2xxx_hwmon_probe(struct phy_device *phydev) { + struct mv88q2xxx_priv *priv = phydev->priv; struct device *dev = &phydev->mdio.dev; struct device *hwmon; char *hwmon_name; - int ret; - - /* Enable temperature sense */ - ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, MDIO_MMD_PCS_MV_TEMP_SENSOR2, - MDIO_MMD_PCS_MV_TEMP_SENSOR2_DIS_MASK, 0); - if (ret < 0) - return ret; + priv->enable_temp = true; hwmon_name = devm_hwmon_sanitize_name(dev, dev_name(dev)); if (IS_ERR(hwmon_name)) return PTR_ERR(hwmon_name); @@ -743,6 +742,14 @@ static int mv88q2xxx_hwmon_probe(struct phy_device *phydev) static int mv88q2xxx_probe(struct phy_device *phydev) { + struct mv88q2xxx_priv *priv; + + priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + phydev->priv = priv; + return mv88q2xxx_hwmon_probe(phydev); } @@ -810,6 +817,18 @@ static int mv88q222x_revb1_revb2_config_init(struct phy_device *phydev) static int mv88q222x_config_init(struct phy_device *phydev) { + struct mv88q2xxx_priv *priv = phydev->priv; + int ret; + + /* Enable temperature sense */ + if (priv->enable_temp) { + ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, + MDIO_MMD_PCS_MV_TEMP_SENSOR2, + MDIO_MMD_PCS_MV_TEMP_SENSOR2_DIS_MASK, 0); + if (ret < 0) + return ret; + } + if (phydev->c45_ids.device_ids[MDIO_MMD_PMAPMD] == PHY_ID_88Q2220_REVB0) return mv88q222x_revb0_config_init(phydev); else diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c index 323717a4821f..34231b5b9175 100644 --- a/drivers/net/phy/nxp-c45-tja11xx.c +++ b/drivers/net/phy/nxp-c45-tja11xx.c @@ -1297,6 +1297,8 @@ static int nxp_c45_soft_reset(struct phy_device *phydev) if (ret) return ret; + usleep_range(2000, 2050); + return phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1, VEND1_DEVICE_CONTROL, ret, !(ret & DEVICE_CONTROL_RESET), 20000, diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 46afb95ffabe..a19789b57190 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -61,7 +61,18 @@ #define IPHETH_USBINTF_PROTO 1 #define IPHETH_IP_ALIGN 2 /* padding at front of URB */ -#define IPHETH_NCM_HEADER_SIZE (12 + 96) /* NCMH + NCM0 */ +/* On iOS devices, NCM headers in RX have a fixed size regardless of DPE count: + * - NTH16 (NCMH): 12 bytes, as per CDC NCM 1.0 spec + * - NDP16 (NCM0): 96 bytes, of which + * - NDP16 fixed header: 8 bytes + * - maximum of 22 DPEs (21 datagrams + trailer), 4 bytes each + */ +#define IPHETH_NDP16_MAX_DPE 22 +#define IPHETH_NDP16_HEADER_SIZE (sizeof(struct usb_cdc_ncm_ndp16) + \ + IPHETH_NDP16_MAX_DPE * \ + sizeof(struct usb_cdc_ncm_dpe16)) +#define IPHETH_NCM_HEADER_SIZE (sizeof(struct usb_cdc_ncm_nth16) + \ + IPHETH_NDP16_HEADER_SIZE) #define IPHETH_TX_BUF_SIZE ETH_FRAME_LEN #define IPHETH_RX_BUF_SIZE_LEGACY (IPHETH_IP_ALIGN + ETH_FRAME_LEN) #define IPHETH_RX_BUF_SIZE_NCM 65536 @@ -207,15 +218,23 @@ static int ipheth_rcvbulk_callback_legacy(struct urb *urb) return ipheth_consume_skb(buf, len, dev); } +/* In "NCM mode", the iOS device encapsulates RX (phone->computer) traffic + * in NCM Transfer Blocks (similarly to CDC NCM). However, unlike reverse + * tethering (handled by the `cdc_ncm` driver), regular tethering is not + * compliant with the CDC NCM spec, as the device is missing the necessary + * descriptors, and TX (computer->phone) traffic is not encapsulated + * at all. Thus `ipheth` implements a very limited subset of the spec with + * the sole purpose of parsing RX URBs. + */ static int ipheth_rcvbulk_callback_ncm(struct urb *urb) { struct usb_cdc_ncm_nth16 *ncmh; struct usb_cdc_ncm_ndp16 *ncm0; struct usb_cdc_ncm_dpe16 *dpe; struct ipheth_device *dev; + u16 dg_idx, dg_len; int retval = -EINVAL; char *buf; - int len; dev = urb->context; @@ -226,40 +245,42 @@ static int ipheth_rcvbulk_callback_ncm(struct urb *urb) ncmh = urb->transfer_buffer; if (ncmh->dwSignature != cpu_to_le32(USB_CDC_NCM_NTH16_SIGN) || - le16_to_cpu(ncmh->wNdpIndex) >= urb->actual_length) { - dev->net->stats.rx_errors++; - return retval; - } + /* On iOS, NDP16 directly follows NTH16 */ + ncmh->wNdpIndex != cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16))) + goto rx_error; - ncm0 = urb->transfer_buffer + le16_to_cpu(ncmh->wNdpIndex); - if (ncm0->dwSignature != cpu_to_le32(USB_CDC_NCM_NDP16_NOCRC_SIGN) || - le16_to_cpu(ncmh->wHeaderLength) + le16_to_cpu(ncm0->wLength) >= - urb->actual_length) { - dev->net->stats.rx_errors++; - return retval; - } + ncm0 = urb->transfer_buffer + sizeof(struct usb_cdc_ncm_nth16); + if (ncm0->dwSignature != cpu_to_le32(USB_CDC_NCM_NDP16_NOCRC_SIGN)) + goto rx_error; dpe = ncm0->dpe16; - while (le16_to_cpu(dpe->wDatagramIndex) != 0 && - le16_to_cpu(dpe->wDatagramLength) != 0) { - if (le16_to_cpu(dpe->wDatagramIndex) >= urb->actual_length || - le16_to_cpu(dpe->wDatagramIndex) + - le16_to_cpu(dpe->wDatagramLength) > urb->actual_length) { + for (int dpe_i = 0; dpe_i < IPHETH_NDP16_MAX_DPE; ++dpe_i, ++dpe) { + dg_idx = le16_to_cpu(dpe->wDatagramIndex); + dg_len = le16_to_cpu(dpe->wDatagramLength); + + /* Null DPE must be present after last datagram pointer entry + * (3.3.1 USB CDC NCM spec v1.0) + */ + if (dg_idx == 0 && dg_len == 0) + return 0; + + if (dg_idx < IPHETH_NCM_HEADER_SIZE || + dg_idx >= urb->actual_length || + dg_len > urb->actual_length - dg_idx) { dev->net->stats.rx_length_errors++; return retval; } - buf = urb->transfer_buffer + le16_to_cpu(dpe->wDatagramIndex); - len = le16_to_cpu(dpe->wDatagramLength); + buf = urb->transfer_buffer + dg_idx; - retval = ipheth_consume_skb(buf, len, dev); + retval = ipheth_consume_skb(buf, dg_len, dev); if (retval != 0) return retval; - - dpe++; } - return 0; +rx_error: + dev->net->stats.rx_errors++; + return retval; } static void ipheth_rcvbulk_callback(struct urb *urb) diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index 01a3b2417a54..ddff6f19ff98 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -71,6 +71,14 @@ #define MSR_SPEED (1<<3) #define MSR_LINK (1<<2) +/* USB endpoints */ +enum rtl8150_usb_ep { + RTL8150_USB_EP_CONTROL = 0, + RTL8150_USB_EP_BULK_IN = 1, + RTL8150_USB_EP_BULK_OUT = 2, + RTL8150_USB_EP_INT_IN = 3, +}; + /* Interrupt pipe data */ #define INT_TSR 0x00 #define INT_RSR 0x01 @@ -867,6 +875,13 @@ static int rtl8150_probe(struct usb_interface *intf, struct usb_device *udev = interface_to_usbdev(intf); rtl8150_t *dev; struct net_device *netdev; + static const u8 bulk_ep_addr[] = { + RTL8150_USB_EP_BULK_IN | USB_DIR_IN, + RTL8150_USB_EP_BULK_OUT | USB_DIR_OUT, + 0}; + static const u8 int_ep_addr[] = { + RTL8150_USB_EP_INT_IN | USB_DIR_IN, + 0}; netdev = alloc_etherdev(sizeof(rtl8150_t)); if (!netdev) @@ -880,6 +895,13 @@ static int rtl8150_probe(struct usb_interface *intf, return -ENOMEM; } + /* Verify that all required endpoints are present */ + if (!usb_check_bulk_endpoints(intf, bulk_ep_addr) || + !usb_check_int_endpoints(intf, int_ep_addr)) { + dev_err(&intf->dev, "couldn't find required endpoints\n"); + goto out; + } + tasklet_setup(&dev->tl, rx_fixup); spin_lock_init(&dev->rx_pool_lock); diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c index d2023e7131bd..6e6e9f05509a 100644 --- a/drivers/net/vxlan/vxlan_vnifilter.c +++ b/drivers/net/vxlan/vxlan_vnifilter.c @@ -411,6 +411,11 @@ static int vxlan_vnifilter_dump(struct sk_buff *skb, struct netlink_callback *cb struct tunnel_msg *tmsg; struct net_device *dev; + if (cb->nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct tunnel_msg))) { + NL_SET_ERR_MSG(cb->extack, "Invalid msg length"); + return -EINVAL; + } + tmsg = nlmsg_data(cb->nlh); if (tmsg->flags & ~TUNNEL_MSG_VALID_USER_FLAGS) { diff --git a/drivers/net/wireless/ath/carl9170/debug.c b/drivers/net/wireless/ath/carl9170/debug.c index bb40889d7c72..2d734567000a 100644 --- a/drivers/net/wireless/ath/carl9170/debug.c +++ b/drivers/net/wireless/ath/carl9170/debug.c @@ -54,7 +54,6 @@ struct carl9170_debugfs_fops { char *(*read)(struct ar9170 *ar, char *buf, size_t bufsize, ssize_t *len); ssize_t (*write)(struct ar9170 *aru, const char *buf, size_t size); - const struct file_operations fops; enum carl9170_device_state req_dev_state; }; @@ -62,7 +61,7 @@ struct carl9170_debugfs_fops { static ssize_t carl9170_debugfs_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { - struct carl9170_debugfs_fops *dfops; + const struct carl9170_debugfs_fops *dfops; struct ar9170 *ar; char *buf = NULL, *res_buf = NULL; ssize_t ret = 0; @@ -75,8 +74,7 @@ static ssize_t carl9170_debugfs_read(struct file *file, char __user *userbuf, if (!ar) return -ENODEV; - dfops = container_of(debugfs_real_fops(file), - struct carl9170_debugfs_fops, fops); + dfops = debugfs_get_aux(file); if (!dfops->read) return -ENOSYS; @@ -113,7 +111,7 @@ out_free: static ssize_t carl9170_debugfs_write(struct file *file, const char __user *userbuf, size_t count, loff_t *ppos) { - struct carl9170_debugfs_fops *dfops; + const struct carl9170_debugfs_fops *dfops; struct ar9170 *ar; char *buf = NULL; int err = 0; @@ -128,8 +126,7 @@ static ssize_t carl9170_debugfs_write(struct file *file, if (!ar) return -ENODEV; - dfops = container_of(debugfs_real_fops(file), - struct carl9170_debugfs_fops, fops); + dfops = debugfs_get_aux(file); if (!dfops->write) return -ENOSYS; @@ -165,6 +162,11 @@ out_free: return err; } +static struct debugfs_short_fops debugfs_fops = { + .read = carl9170_debugfs_read, + .write = carl9170_debugfs_write, +}; + #define __DEBUGFS_DECLARE_FILE(name, _read, _write, _read_bufsize, \ _attr, _dstate) \ static const struct carl9170_debugfs_fops carl_debugfs_##name ##_ops = {\ @@ -173,12 +175,6 @@ static const struct carl9170_debugfs_fops carl_debugfs_##name ##_ops = {\ .write = _write, \ .attr = _attr, \ .req_dev_state = _dstate, \ - .fops = { \ - .open = simple_open, \ - .read = carl9170_debugfs_read, \ - .write = carl9170_debugfs_write, \ - .owner = THIS_MODULE \ - }, \ } #define DEBUGFS_DECLARE_FILE(name, _read, _write, _read_bufsize, _attr) \ @@ -816,9 +812,9 @@ void carl9170_debugfs_register(struct ar9170 *ar) ar->hw->wiphy->debugfsdir); #define DEBUGFS_ADD(name) \ - debugfs_create_file(#name, carl_debugfs_##name ##_ops.attr, \ - ar->debug_dir, ar, \ - &carl_debugfs_##name ## _ops.fops) + debugfs_create_file_aux(#name, carl_debugfs_##name ##_ops.attr, \ + ar->debug_dir, ar, &carl_debugfs_##name ## _ops, \ + &debugfs_fops) DEBUGFS_ADD(usb_tx_anch_urbs); DEBUGFS_ADD(usb_rx_pool_urbs); diff --git a/drivers/net/wireless/broadcom/b43/debugfs.c b/drivers/net/wireless/broadcom/b43/debugfs.c index efa98444e3fb..5a49970afc8c 100644 --- a/drivers/net/wireless/broadcom/b43/debugfs.c +++ b/drivers/net/wireless/broadcom/b43/debugfs.c @@ -30,7 +30,6 @@ static struct dentry *rootdir; struct b43_debugfs_fops { ssize_t (*read)(struct b43_wldev *dev, char *buf, size_t bufsize); int (*write)(struct b43_wldev *dev, const char *buf, size_t count); - struct file_operations fops; /* Offset of struct b43_dfs_file in struct b43_dfsentry */ size_t file_struct_offset; }; @@ -491,7 +490,7 @@ static ssize_t b43_debugfs_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { struct b43_wldev *dev; - struct b43_debugfs_fops *dfops; + const struct b43_debugfs_fops *dfops; struct b43_dfs_file *dfile; ssize_t ret; char *buf; @@ -511,8 +510,7 @@ static ssize_t b43_debugfs_read(struct file *file, char __user *userbuf, goto out_unlock; } - dfops = container_of(debugfs_real_fops(file), - struct b43_debugfs_fops, fops); + dfops = debugfs_get_aux(file); if (!dfops->read) { err = -ENOSYS; goto out_unlock; @@ -555,7 +553,7 @@ static ssize_t b43_debugfs_write(struct file *file, size_t count, loff_t *ppos) { struct b43_wldev *dev; - struct b43_debugfs_fops *dfops; + const struct b43_debugfs_fops *dfops; char *buf; int err = 0; @@ -573,8 +571,7 @@ static ssize_t b43_debugfs_write(struct file *file, goto out_unlock; } - dfops = container_of(debugfs_real_fops(file), - struct b43_debugfs_fops, fops); + dfops = debugfs_get_aux(file); if (!dfops->write) { err = -ENOSYS; goto out_unlock; @@ -602,16 +599,16 @@ out_unlock: } +static struct debugfs_short_fops debugfs_ops = { + .read = b43_debugfs_read, + .write = b43_debugfs_write, + .llseek = generic_file_llseek, +}; + #define B43_DEBUGFS_FOPS(name, _read, _write) \ static struct b43_debugfs_fops fops_##name = { \ .read = _read, \ .write = _write, \ - .fops = { \ - .open = simple_open, \ - .read = b43_debugfs_read, \ - .write = b43_debugfs_write, \ - .llseek = generic_file_llseek, \ - }, \ .file_struct_offset = offsetof(struct b43_dfsentry, \ file_##name), \ } @@ -703,9 +700,9 @@ void b43_debugfs_add_device(struct b43_wldev *dev) #define ADD_FILE(name, mode) \ do { \ - debugfs_create_file(__stringify(name), \ + debugfs_create_file_aux(__stringify(name), \ mode, e->subdir, dev, \ - &fops_##name.fops); \ + &fops_##name, &debugfs_ops); \ } while (0) diff --git a/drivers/net/wireless/broadcom/b43legacy/debugfs.c b/drivers/net/wireless/broadcom/b43legacy/debugfs.c index 6b0e8d117061..5d04bcc216e5 100644 --- a/drivers/net/wireless/broadcom/b43legacy/debugfs.c +++ b/drivers/net/wireless/broadcom/b43legacy/debugfs.c @@ -31,7 +31,6 @@ static struct dentry *rootdir; struct b43legacy_debugfs_fops { ssize_t (*read)(struct b43legacy_wldev *dev, char *buf, size_t bufsize); int (*write)(struct b43legacy_wldev *dev, const char *buf, size_t count); - struct file_operations fops; /* Offset of struct b43legacy_dfs_file in struct b43legacy_dfsentry */ size_t file_struct_offset; /* Take wl->irq_lock before calling read/write? */ @@ -188,7 +187,7 @@ static ssize_t b43legacy_debugfs_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { struct b43legacy_wldev *dev; - struct b43legacy_debugfs_fops *dfops; + const struct b43legacy_debugfs_fops *dfops; struct b43legacy_dfs_file *dfile; ssize_t ret; char *buf; @@ -208,8 +207,7 @@ static ssize_t b43legacy_debugfs_read(struct file *file, char __user *userbuf, goto out_unlock; } - dfops = container_of(debugfs_real_fops(file), - struct b43legacy_debugfs_fops, fops); + dfops = debugfs_get_aux(file); if (!dfops->read) { err = -ENOSYS; goto out_unlock; @@ -257,7 +255,7 @@ static ssize_t b43legacy_debugfs_write(struct file *file, size_t count, loff_t *ppos) { struct b43legacy_wldev *dev; - struct b43legacy_debugfs_fops *dfops; + const struct b43legacy_debugfs_fops *dfops; char *buf; int err = 0; @@ -275,8 +273,7 @@ static ssize_t b43legacy_debugfs_write(struct file *file, goto out_unlock; } - dfops = container_of(debugfs_real_fops(file), - struct b43legacy_debugfs_fops, fops); + dfops = debugfs_get_aux(file); if (!dfops->write) { err = -ENOSYS; goto out_unlock; @@ -308,17 +305,16 @@ out_unlock: return err ? err : count; } +static struct debugfs_short_fops debugfs_ops = { + .read = b43legacy_debugfs_read, + .write = b43legacy_debugfs_write, + .llseek = generic_file_llseek +}; #define B43legacy_DEBUGFS_FOPS(name, _read, _write, _take_irqlock) \ static struct b43legacy_debugfs_fops fops_##name = { \ .read = _read, \ .write = _write, \ - .fops = { \ - .open = simple_open, \ - .read = b43legacy_debugfs_read, \ - .write = b43legacy_debugfs_write, \ - .llseek = generic_file_llseek, \ - }, \ .file_struct_offset = offsetof(struct b43legacy_dfsentry, \ file_##name), \ .take_irqlock = _take_irqlock, \ @@ -386,9 +382,9 @@ void b43legacy_debugfs_add_device(struct b43legacy_wldev *dev) #define ADD_FILE(name, mode) \ do { \ - debugfs_create_file(__stringify(name), mode, \ + debugfs_create_file_aux(__stringify(name), mode, \ e->subdir, dev, \ - &fops_##name.fops); \ + &fops_##name, &debugfs_ops); \ } while (0) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c index a259f4dd9540..413973d05b43 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c @@ -1479,14 +1479,13 @@ static void mt7603_mac_watchdog_reset(struct mt7603_dev *dev) tasklet_enable(&dev->mt76.pre_tbtt_tasklet); mt7603_beacon_set_timer(dev, -1, beacon_int); - local_bh_disable(); napi_enable(&dev->mt76.tx_napi); - napi_schedule(&dev->mt76.tx_napi); - napi_enable(&dev->mt76.napi[0]); - napi_schedule(&dev->mt76.napi[0]); - napi_enable(&dev->mt76.napi[1]); + + local_bh_disable(); + napi_schedule(&dev->mt76.tx_napi); + napi_schedule(&dev->mt76.napi[0]); napi_schedule(&dev->mt76.napi[1]); local_bh_enable(); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci.c index 9a278589df4e..68010e27f065 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci.c @@ -164,12 +164,16 @@ static int mt7615_pci_resume(struct pci_dev *pdev) dev_err(mdev->dev, "PDMA engine must be reinitialized\n"); mt76_worker_enable(&mdev->tx_worker); - local_bh_disable(); + mt76_for_each_q_rx(mdev, i) { napi_enable(&mdev->napi[i]); - napi_schedule(&mdev->napi[i]); } napi_enable(&mdev->tx_napi); + + local_bh_disable(); + mt76_for_each_q_rx(mdev, i) { + napi_schedule(&mdev->napi[i]); + } napi_schedule(&mdev->tx_napi); local_bh_enable(); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c index a0ca3bbdfcaf..c2e4e6aabd9f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c @@ -262,12 +262,14 @@ void mt7615_mac_reset_work(struct work_struct *work) mt76_worker_enable(&dev->mt76.tx_worker); - local_bh_disable(); napi_enable(&dev->mt76.tx_napi); - napi_schedule(&dev->mt76.tx_napi); - mt76_for_each_q_rx(&dev->mt76, i) { napi_enable(&dev->mt76.napi[i]); + } + + local_bh_disable(); + napi_schedule(&dev->mt76.tx_napi); + mt76_for_each_q_rx(&dev->mt76, i) { napi_schedule(&dev->mt76.napi[i]); } local_bh_enable(); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c index 1eb955f3ca13..b456ccd00d58 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c @@ -282,14 +282,16 @@ static int mt76x0e_resume(struct pci_dev *pdev) mt76_worker_enable(&mdev->tx_worker); - local_bh_disable(); mt76_for_each_q_rx(mdev, i) { mt76_queue_rx_reset(dev, i); napi_enable(&mdev->napi[i]); - napi_schedule(&mdev->napi[i]); } - napi_enable(&mdev->tx_napi); + + local_bh_disable(); + mt76_for_each_q_rx(mdev, i) { + napi_schedule(&mdev->napi[i]); + } napi_schedule(&mdev->tx_napi); local_bh_enable(); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c index 7d840ad4ae65..a82c75ba26e6 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c @@ -504,12 +504,14 @@ static void mt76x02_watchdog_reset(struct mt76x02_dev *dev) mt76_worker_enable(&dev->mt76.tx_worker); tasklet_enable(&dev->mt76.pre_tbtt_tasklet); - local_bh_disable(); napi_enable(&dev->mt76.tx_napi); - napi_schedule(&dev->mt76.tx_napi); - mt76_for_each_q_rx(&dev->mt76, i) { napi_enable(&dev->mt76.napi[i]); + } + + local_bh_disable(); + napi_schedule(&dev->mt76.tx_napi); + mt76_for_each_q_rx(&dev->mt76, i) { napi_schedule(&dev->mt76.napi[i]); } local_bh_enable(); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c index 67c9d1caa0bd..727bfdd00b40 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c @@ -151,12 +151,15 @@ mt76x2e_resume(struct pci_dev *pdev) mt76_worker_enable(&mdev->tx_worker); - local_bh_disable(); mt76_for_each_q_rx(mdev, i) { napi_enable(&mdev->napi[i]); - napi_schedule(&mdev->napi[i]); } napi_enable(&mdev->tx_napi); + + local_bh_disable(); + mt76_for_each_q_rx(mdev, i) { + napi_schedule(&mdev->napi[i]); + } napi_schedule(&mdev->tx_napi); local_bh_enable(); diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c index 13bdc0a7174c..2ba6eb3038ce 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c @@ -1356,10 +1356,15 @@ mt7915_mac_restart(struct mt7915_dev *dev) mt7915_dma_reset(dev, true); - local_bh_disable(); mt76_for_each_q_rx(mdev, i) { if (mdev->q_rx[i].ndesc) { napi_enable(&dev->mt76.napi[i]); + } + } + + local_bh_disable(); + mt76_for_each_q_rx(mdev, i) { + if (mdev->q_rx[i].ndesc) { napi_schedule(&dev->mt76.napi[i]); } } @@ -1419,8 +1424,9 @@ out: if (phy2) clear_bit(MT76_RESET, &phy2->mt76->state); - local_bh_disable(); napi_enable(&dev->mt76.tx_napi); + + local_bh_disable(); napi_schedule(&dev->mt76.tx_napi); local_bh_enable(); @@ -1570,9 +1576,12 @@ void mt7915_mac_reset_work(struct work_struct *work) if (phy2) clear_bit(MT76_RESET, &phy2->mt76->state); - local_bh_disable(); mt76_for_each_q_rx(&dev->mt76, i) { napi_enable(&dev->mt76.napi[i]); + } + + local_bh_disable(); + mt76_for_each_q_rx(&dev->mt76, i) { napi_schedule(&dev->mt76.napi[i]); } local_bh_enable(); @@ -1581,8 +1590,8 @@ void mt7915_mac_reset_work(struct work_struct *work) mt76_worker_enable(&dev->mt76.tx_worker); - local_bh_disable(); napi_enable(&dev->mt76.tx_napi); + local_bh_disable(); napi_schedule(&dev->mt76.tx_napi); local_bh_enable(); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c index ba870e1b05fb..a0c9df3c2cc7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c @@ -523,12 +523,15 @@ static int mt7921_pci_resume(struct device *device) mt76_worker_enable(&mdev->tx_worker); - local_bh_disable(); mt76_for_each_q_rx(mdev, i) { napi_enable(&mdev->napi[i]); - napi_schedule(&mdev->napi[i]); } napi_enable(&mdev->tx_napi); + + local_bh_disable(); + mt76_for_each_q_rx(mdev, i) { + napi_schedule(&mdev->napi[i]); + } napi_schedule(&mdev->tx_napi); local_bh_enable(); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c index 2452b1a2d118..881812ba03ff 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c @@ -81,9 +81,12 @@ int mt7921e_mac_reset(struct mt792x_dev *dev) mt792x_wpdma_reset(dev, true); - local_bh_disable(); mt76_for_each_q_rx(&dev->mt76, i) { napi_enable(&dev->mt76.napi[i]); + } + + local_bh_disable(); + mt76_for_each_q_rx(&dev->mt76, i) { napi_schedule(&dev->mt76.napi[i]); } local_bh_enable(); @@ -115,8 +118,8 @@ int mt7921e_mac_reset(struct mt792x_dev *dev) err = __mt7921_start(&dev->phy); out: - local_bh_disable(); napi_enable(&dev->mt76.tx_napi); + local_bh_disable(); napi_schedule(&dev->mt76.tx_napi); local_bh_enable(); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/pci.c b/drivers/net/wireless/mediatek/mt76/mt7925/pci.c index f36893e20c61..c7b5dc1dbb34 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/pci.c @@ -556,12 +556,15 @@ static int mt7925_pci_resume(struct device *device) mt76_worker_enable(&mdev->tx_worker); - local_bh_disable(); mt76_for_each_q_rx(mdev, i) { napi_enable(&mdev->napi[i]); - napi_schedule(&mdev->napi[i]); } napi_enable(&mdev->tx_napi); + + local_bh_disable(); + mt76_for_each_q_rx(mdev, i) { + napi_schedule(&mdev->napi[i]); + } napi_schedule(&mdev->tx_napi); local_bh_enable(); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/pci_mac.c b/drivers/net/wireless/mediatek/mt76/mt7925/pci_mac.c index faedbf766d1a..4578d16bf456 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/pci_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/pci_mac.c @@ -101,12 +101,15 @@ int mt7925e_mac_reset(struct mt792x_dev *dev) mt792x_wpdma_reset(dev, true); - local_bh_disable(); mt76_for_each_q_rx(&dev->mt76, i) { napi_enable(&dev->mt76.napi[i]); - napi_schedule(&dev->mt76.napi[i]); } napi_enable(&dev->mt76.tx_napi); + + local_bh_disable(); + mt76_for_each_q_rx(&dev->mt76, i) { + napi_schedule(&dev->mt76.napi[i]); + } napi_schedule(&dev->mt76.tx_napi); local_bh_enable(); diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index bc8cba4dca47..019c925ae600 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -1695,7 +1695,6 @@ mt7996_mac_restart(struct mt7996_dev *dev) mt7996_dma_reset(dev, true); - local_bh_disable(); mt76_for_each_q_rx(mdev, i) { if (mtk_wed_device_active(&dev->mt76.mmio.wed) && mt76_queue_is_wed_rro(&mdev->q_rx[i])) @@ -1703,10 +1702,11 @@ mt7996_mac_restart(struct mt7996_dev *dev) if (mdev->q_rx[i].ndesc) { napi_enable(&dev->mt76.napi[i]); + local_bh_disable(); napi_schedule(&dev->mt76.napi[i]); + local_bh_enable(); } } - local_bh_enable(); clear_bit(MT76_MCU_RESET, &dev->mphy.state); clear_bit(MT76_STATE_MCU_RUNNING, &dev->mphy.state); @@ -1764,8 +1764,8 @@ out: if (phy3) clear_bit(MT76_RESET, &phy3->mt76->state); - local_bh_disable(); napi_enable(&dev->mt76.tx_napi); + local_bh_disable(); napi_schedule(&dev->mt76.tx_napi); local_bh_enable(); @@ -1958,23 +1958,23 @@ void mt7996_mac_reset_work(struct work_struct *work) if (phy3) clear_bit(MT76_RESET, &phy3->mt76->state); - local_bh_disable(); mt76_for_each_q_rx(&dev->mt76, i) { if (mtk_wed_device_active(&dev->mt76.mmio.wed) && mt76_queue_is_wed_rro(&dev->mt76.q_rx[i])) continue; napi_enable(&dev->mt76.napi[i]); + local_bh_disable(); napi_schedule(&dev->mt76.napi[i]); + local_bh_enable(); } - local_bh_enable(); tasklet_schedule(&dev->mt76.irq_tasklet); mt76_worker_enable(&dev->mt76.tx_worker); - local_bh_disable(); napi_enable(&dev->mt76.tx_napi); + local_bh_disable(); napi_schedule(&dev->mt76.tx_napi); local_bh_enable(); diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 2237715e42eb..0ccf4a9e523a 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -1212,7 +1212,7 @@ enum nd_ioctl_mode { DIMM_IOCTL, }; -static int match_dimm(struct device *dev, void *data) +static int match_dimm(struct device *dev, const void *data) { long id = (long) data; diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index 030dbde6b088..9e84ab411564 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -67,13 +67,6 @@ bool nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, return claimed; } -static int namespace_match(struct device *dev, void *data) -{ - char *name = data; - - return strcmp(name, dev_name(dev)) == 0; -} - static bool is_idle(struct device *dev, struct nd_namespace_common *ndns) { struct nd_region *nd_region = to_nd_region(dev->parent); @@ -168,7 +161,7 @@ ssize_t nd_namespace_store(struct device *dev, goto out; } - found = device_find_child(dev->parent, name, namespace_match); + found = device_find_child_by_name(dev->parent, name); if (!found) { dev_dbg(dev, "'%s' not found under %s\n", name, dev_name(dev->parent)); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 76b615d4d5b9..40046770f1bf 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2132,15 +2132,16 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns, struct nvme_ns_info *info) { struct queue_limits lim; + unsigned int memflags; int ret; lim = queue_limits_start_update(ns->disk->queue); nvme_set_ctrl_limits(ns->ctrl, &lim); - blk_mq_freeze_queue(ns->disk->queue); + memflags = blk_mq_freeze_queue(ns->disk->queue); ret = queue_limits_commit_update(ns->disk->queue, &lim); set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info)); - blk_mq_unfreeze_queue(ns->disk->queue); + blk_mq_unfreeze_queue(ns->disk->queue, memflags); /* Hide the block-interface for these devices */ if (!ret) @@ -2155,6 +2156,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, struct nvme_id_ns_nvm *nvm = NULL; struct nvme_zone_info zi = {}; struct nvme_id_ns *id; + unsigned int memflags; sector_t capacity; unsigned lbaf; int ret; @@ -2186,7 +2188,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, lim = queue_limits_start_update(ns->disk->queue); - blk_mq_freeze_queue(ns->disk->queue); + memflags = blk_mq_freeze_queue(ns->disk->queue); ns->head->lba_shift = id->lbaf[lbaf].ds; ns->head->nuse = le64_to_cpu(id->nuse); capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze)); @@ -2219,7 +2221,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, ret = queue_limits_commit_update(ns->disk->queue, &lim); if (ret) { - blk_mq_unfreeze_queue(ns->disk->queue); + blk_mq_unfreeze_queue(ns->disk->queue, memflags); goto out; } @@ -2235,7 +2237,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, ns->head->features |= NVME_NS_DEAC; set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info)); set_bit(NVME_NS_READY, &ns->flags); - blk_mq_unfreeze_queue(ns->disk->queue); + blk_mq_unfreeze_queue(ns->disk->queue, memflags); if (blk_queue_is_zoned(ns->queue)) { ret = blk_revalidate_disk_zones(ns->disk); @@ -2291,9 +2293,10 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) if (!ret && nvme_ns_head_multipath(ns->head)) { struct queue_limits *ns_lim = &ns->disk->queue->limits; struct queue_limits lim; + unsigned int memflags; lim = queue_limits_start_update(ns->head->disk->queue); - blk_mq_freeze_queue(ns->head->disk->queue); + memflags = blk_mq_freeze_queue(ns->head->disk->queue); /* * queue_limits mixes values that are the hardware limitations * for bio splitting with what is the device configuration. @@ -2325,7 +2328,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info)); nvme_mpath_revalidate_paths(ns); - blk_mq_unfreeze_queue(ns->head->disk->queue); + blk_mq_unfreeze_queue(ns->head->disk->queue, memflags); } return ret; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index a85d190942bd..2a7635565083 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -60,7 +60,7 @@ void nvme_mpath_unfreeze(struct nvme_subsystem *subsys) lockdep_assert_held(&subsys->lock); list_for_each_entry(h, &subsys->nsheads, entry) if (h->disk) - blk_mq_unfreeze_queue(h->disk->queue); + blk_mq_unfreeze_queue_nomemrestore(h->disk->queue); } void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys) diff --git a/drivers/of/unittest-data/tests-platform.dtsi b/drivers/of/unittest-data/tests-platform.dtsi index cd310b26b50c..4171f43cf01c 100644 --- a/drivers/of/unittest-data/tests-platform.dtsi +++ b/drivers/of/unittest-data/tests-platform.dtsi @@ -33,6 +33,11 @@ reg = <0x100>; }; }; + + test-device@2 { + compatible = "test,rust-device"; + reg = <0x2>; + }; }; platform-tests-2 { diff --git a/drivers/opp/debugfs.c b/drivers/opp/debugfs.c index 105de7c3274a..8fc6238b1728 100644 --- a/drivers/opp/debugfs.c +++ b/drivers/opp/debugfs.c @@ -217,7 +217,7 @@ static void opp_migrate_dentry(struct opp_device *opp_dev, { struct opp_device *new_dev = NULL, *iter; const struct device *dev; - struct dentry *dentry; + int err; /* Look for next opp-dev */ list_for_each_entry(iter, &opp_table->dev_list, node) @@ -234,16 +234,14 @@ static void opp_migrate_dentry(struct opp_device *opp_dev, opp_set_dev_name(dev, opp_table->dentry_name); - dentry = debugfs_rename(rootdir, opp_dev->dentry, rootdir, - opp_table->dentry_name); - if (IS_ERR(dentry)) { + err = debugfs_change_name(opp_dev->dentry, "%s", opp_table->dentry_name); + if (err) { dev_err(dev, "%s: Failed to rename link from: %s to %s\n", __func__, dev_name(opp_dev->dev), dev_name(dev)); return; } - new_dev->dentry = dentry; - opp_table->dentry = dentry; + new_dev->dentry = opp_table->dentry = opp_dev->dentry; } /** diff --git a/drivers/pci/devres.c b/drivers/pci/devres.c index d1d97a4bb36d..3431a7df3e0d 100644 --- a/drivers/pci/devres.c +++ b/drivers/pci/devres.c @@ -419,19 +419,12 @@ static void pcim_intx_restore(struct device *dev, void *data) pci_intx(pdev, res->orig_intx); } -static struct pcim_intx_devres *get_or_create_intx_devres(struct device *dev) +static void save_orig_intx(struct pci_dev *pdev, struct pcim_intx_devres *res) { - struct pcim_intx_devres *res; - - res = devres_find(dev, pcim_intx_restore, NULL, NULL); - if (res) - return res; + u16 pci_command; - res = devres_alloc(pcim_intx_restore, sizeof(*res), GFP_KERNEL); - if (res) - devres_add(dev, res); - - return res; + pci_read_config_word(pdev, PCI_COMMAND, &pci_command); + res->orig_intx = !(pci_command & PCI_COMMAND_INTX_DISABLE); } /** @@ -447,12 +440,23 @@ static struct pcim_intx_devres *get_or_create_intx_devres(struct device *dev) int pcim_intx(struct pci_dev *pdev, int enable) { struct pcim_intx_devres *res; + struct device *dev = &pdev->dev; - res = get_or_create_intx_devres(&pdev->dev); - if (!res) - return -ENOMEM; + /* + * pcim_intx() must only restore the INTx value that existed before the + * driver was loaded, i.e., before it called pcim_intx() for the + * first time. + */ + res = devres_find(dev, pcim_intx_restore, NULL, NULL); + if (!res) { + res = devres_alloc(pcim_intx_restore, sizeof(*res), GFP_KERNEL); + if (!res) + return -ENOMEM; + + save_orig_intx(pdev, res); + devres_add(dev, res); + } - res->orig_intx = !enable; pci_intx(pdev, enable); return 0; diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index b5cc11abc962..0e360feb3432 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -1279,7 +1279,7 @@ static int armv8pmu_proc_user_access_handler(const struct ctl_table *table, int return 0; } -static struct ctl_table armv8_pmu_sysctl_table[] = { +static const struct ctl_table armv8_pmu_sysctl_table[] = { { .procname = "perf_user_access", .data = &sysctl_perf_user_access, diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 194c153e5d71..698de8ddf895 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -1317,7 +1317,7 @@ static int riscv_pmu_proc_user_access_handler(const struct ctl_table *table, return 0; } -static struct ctl_table sbi_pmu_sysctl_table[] = { +static const struct ctl_table sbi_pmu_sysctl_table[] = { { .procname = "perf_user_access", .data = &sysctl_perf_user_access, diff --git a/drivers/phy/allwinner/phy-sun4i-usb.c b/drivers/phy/allwinner/phy-sun4i-usb.c index cd159a71b23c..29b8fd4b9351 100644 --- a/drivers/phy/allwinner/phy-sun4i-usb.c +++ b/drivers/phy/allwinner/phy-sun4i-usb.c @@ -23,7 +23,6 @@ #include <linux/module.h> #include <linux/mutex.h> #include <linux/of.h> -#include <linux/of_gpio.h> #include <linux/phy/phy.h> #include <linux/phy/phy-sun4i-usb.h> #include <linux/platform_device.h> diff --git a/drivers/phy/freescale/phy-fsl-samsung-hdmi.c b/drivers/phy/freescale/phy-fsl-samsung-hdmi.c index d3ccf547ba1c..45004f598e4d 100644 --- a/drivers/phy/freescale/phy-fsl-samsung-hdmi.c +++ b/drivers/phy/freescale/phy-fsl-samsung-hdmi.c @@ -331,25 +331,17 @@ fsl_samsung_hdmi_phy_configure_pll_lock_det(struct fsl_samsung_hdmi_phy *phy, { u32 pclk = cfg->pixclk; u32 fld_tg_code; - u32 pclk_khz; - u8 div = 1; - - switch (cfg->pixclk) { - case 22250000 ... 47500000: - div = 1; - break; - case 50349650 ... 99000000: - div = 2; - break; - case 100699300 ... 198000000: - div = 4; - break; - case 205000000 ... 297000000: - div = 8; - break; + u32 int_pllclk; + u8 div; + + /* Find int_pllclk speed */ + for (div = 0; div < 4; div++) { + int_pllclk = pclk / (1 << div); + if (int_pllclk < (50 * MHZ)) + break; } - writeb(FIELD_PREP(REG12_CK_DIV_MASK, ilog2(div)), phy->regs + PHY_REG(12)); + writeb(FIELD_PREP(REG12_CK_DIV_MASK, div), phy->regs + PHY_REG(12)); /* * Calculation for the frequency lock detector target code (fld_tg_code) @@ -362,10 +354,8 @@ fsl_samsung_hdmi_phy_configure_pll_lock_det(struct fsl_samsung_hdmi_phy *phy, * settings rounding up always too. TODO: Check if that is * correct. */ - pclk /= div; - pclk_khz = pclk / 1000; - fld_tg_code = 256 * 1000 * 1000 / pclk_khz * 24; - fld_tg_code = DIV_ROUND_UP(fld_tg_code, 1000); + + fld_tg_code = DIV_ROUND_UP(24 * MHZ * 256, int_pllclk); /* FLD_TOL and FLD_RP_CODE taken from downstream driver */ writeb(FIELD_PREP(REG13_TG_CODE_LOW_MASK, fld_tg_code), @@ -406,16 +396,15 @@ static unsigned long fsl_samsung_hdmi_phy_find_pms(unsigned long fout, u8 *p, u1 continue; /* - * TODO: Ref Manual doesn't state the range of _m - * so this should be further refined if possible. - * This range was set based on the original values - * in the lookup table + * The Ref manual doesn't explicitly state the range of M, + * but it does show it as an 8-bit value, so reject + * any value above 255. */ tmp = (u64)fout * (_p * _s); do_div(tmp, 24 * MHZ); - _m = tmp; - if (_m < 0x30 || _m > 0x7b) + if (tmp > 255) continue; + _m = tmp; /* * Rev 2 of the Ref Manual states the @@ -440,9 +429,13 @@ static unsigned long fsl_samsung_hdmi_phy_find_pms(unsigned long fout, u8 *p, u1 min_delta = delta; best_freq = tmp; } + + /* If we have an exact match, stop looking for a better value */ + if (!delta) + goto done; } } - +done: if (best_freq) { *p = best_p; *m = best_m; diff --git a/drivers/phy/hisilicon/phy-hi3670-pcie.c b/drivers/phy/hisilicon/phy-hi3670-pcie.c index 0ac9634b398d..dbc7dcce682b 100644 --- a/drivers/phy/hisilicon/phy-hi3670-pcie.c +++ b/drivers/phy/hisilicon/phy-hi3670-pcie.c @@ -16,15 +16,20 @@ */ #include <linux/bitfield.h> +#include <linux/bits.h> #include <linux/clk.h> -#include <linux/gpio.h> -#include <linux/kernel.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/io.h> #include <linux/mfd/syscon.h> +#include <linux/mod_devicetable.h> #include <linux/module.h> -#include <linux/of_gpio.h> +#include <linux/of.h> #include <linux/phy/phy.h> #include <linux/platform_device.h> #include <linux/regmap.h> +#include <linux/types.h> #define AXI_CLK_FREQ 207500000 #define REF_CLK_FREQ 100000000 diff --git a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c index fefc02d921e6..71f9c14fb50d 100644 --- a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c +++ b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c @@ -422,7 +422,7 @@ static int mvebu_comphy_ethernet_init_reset(struct mvebu_comphy_lane *lane) /* wait until clocks are ready */ mdelay(1); - /* exlicitly disable 40B, the bits isn't clear on reset */ + /* explicitly disable 40B, the bits isn't clear on reset */ regmap_read(priv->regmap, MVEBU_COMPHY_CONF6(lane->id), &val); val &= ~MVEBU_COMPHY_CONF6_40B; regmap_write(priv->regmap, MVEBU_COMPHY_CONF6(lane->id), val); diff --git a/drivers/phy/mediatek/phy-mtk-hdmi-mt8195.c b/drivers/phy/mediatek/phy-mtk-hdmi-mt8195.c index bbfe11d6a69d..b38f3ae26b3f 100644 --- a/drivers/phy/mediatek/phy-mtk-hdmi-mt8195.c +++ b/drivers/phy/mediatek/phy-mtk-hdmi-mt8195.c @@ -9,6 +9,8 @@ #include <linux/module.h> #include <linux/phy/phy.h> #include <linux/platform_device.h> +#include <linux/regulator/driver.h> +#include <linux/regulator/of_regulator.h> #include <linux/types.h> #include <linux/units.h> #include <linux/nvmem-consumer.h> @@ -478,8 +480,50 @@ static int mtk_hdmi_phy_configure(struct phy *phy, union phy_configure_opts *opt return ret; } +static int mtk_hdmi_phy_pwr5v_enable(struct regulator_dev *rdev) +{ + struct mtk_hdmi_phy *hdmi_phy = rdev_get_drvdata(rdev); + + mtk_phy_set_bits(hdmi_phy->regs + HDMI_CTL_1, RG_HDMITX_PWR5V_O); + + return 0; +} + +static int mtk_hdmi_phy_pwr5v_disable(struct regulator_dev *rdev) +{ + struct mtk_hdmi_phy *hdmi_phy = rdev_get_drvdata(rdev); + + mtk_phy_clear_bits(hdmi_phy->regs + HDMI_CTL_1, RG_HDMITX_PWR5V_O); + + return 0; +} + +static int mtk_hdmi_phy_pwr5v_is_enabled(struct regulator_dev *rdev) +{ + struct mtk_hdmi_phy *hdmi_phy = rdev_get_drvdata(rdev); + + return !!(readl(hdmi_phy->regs + HDMI_CTL_1) & RG_HDMITX_PWR5V_O); +} + +static const struct regulator_ops mtk_hdmi_pwr5v_regulator_ops = { + .enable = mtk_hdmi_phy_pwr5v_enable, + .disable = mtk_hdmi_phy_pwr5v_disable, + .is_enabled = mtk_hdmi_phy_pwr5v_is_enabled +}; + +static const struct regulator_desc mtk_hdmi_phy_pwr5v_desc = { + .name = "hdmi-pwr5v", + .id = -1, + .n_voltages = 1, + .fixed_uV = 5000000, + .ops = &mtk_hdmi_pwr5v_regulator_ops, + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, +}; + struct mtk_hdmi_phy_conf mtk_hdmi_phy_8195_conf = { .flags = CLK_SET_RATE_PARENT | CLK_SET_RATE_GATE, + .hdmi_phy_regulator_desc = &mtk_hdmi_phy_pwr5v_desc, .hdmi_phy_clk_ops = &mtk_hdmi_pll_ops, .hdmi_phy_enable_tmds = mtk_hdmi_phy_enable_tmds, .hdmi_phy_disable_tmds = mtk_hdmi_phy_disable_tmds, diff --git a/drivers/phy/mediatek/phy-mtk-hdmi-mt8195.h b/drivers/phy/mediatek/phy-mtk-hdmi-mt8195.h index 22a68dc9550c..e26caaf4d104 100644 --- a/drivers/phy/mediatek/phy-mtk-hdmi-mt8195.h +++ b/drivers/phy/mediatek/phy-mtk-hdmi-mt8195.h @@ -103,6 +103,9 @@ #define HDMI_ANA_CTL 0x7c #define REG_ANA_HDMI20_FIFO_EN BIT(16) +#define HDMI_CTL_1 0xc4 +#define RG_HDMITX_PWR5V_O BIT(9) + #define HDMI_CTL_3 0xcc #define REG_HDMITXPLL_DIV GENMASK(4, 0) #define REG_HDMITX_REF_XTAL_SEL BIT(7) diff --git a/drivers/phy/mediatek/phy-mtk-hdmi.c b/drivers/phy/mediatek/phy-mtk-hdmi.c index d2e824771f9d..52a7d525ff9b 100644 --- a/drivers/phy/mediatek/phy-mtk-hdmi.c +++ b/drivers/phy/mediatek/phy-mtk-hdmi.c @@ -75,6 +75,28 @@ static void mtk_hdmi_phy_clk_get_data(struct mtk_hdmi_phy *hdmi_phy, clk_init->ops = hdmi_phy->conf->hdmi_phy_clk_ops; } +static int mtk_hdmi_phy_register_regulators(struct mtk_hdmi_phy *hdmi_phy) +{ + const struct regulator_desc *vreg_desc = hdmi_phy->conf->hdmi_phy_regulator_desc; + const struct regulator_init_data vreg_init_data = { + .constraints = { + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + } + }; + struct regulator_config vreg_config = { + .dev = hdmi_phy->dev, + .driver_data = hdmi_phy, + .init_data = &vreg_init_data, + .of_node = hdmi_phy->dev->of_node + }; + + hdmi_phy->rdev = devm_regulator_register(hdmi_phy->dev, vreg_desc, &vreg_config); + if (IS_ERR(hdmi_phy->rdev)) + return PTR_ERR(hdmi_phy->rdev); + + return 0; +} + static int mtk_hdmi_phy_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -150,6 +172,12 @@ static int mtk_hdmi_phy_probe(struct platform_device *pdev) if (hdmi_phy->conf->pll_default_off) hdmi_phy->conf->hdmi_phy_disable_tmds(hdmi_phy); + if (hdmi_phy->conf->hdmi_phy_regulator_desc) { + ret = mtk_hdmi_phy_register_regulators(hdmi_phy); + if (ret) + return ret; + } + return of_clk_add_provider(dev->of_node, of_clk_src_simple_get, hdmi_phy->pll); } diff --git a/drivers/phy/mediatek/phy-mtk-hdmi.h b/drivers/phy/mediatek/phy-mtk-hdmi.h index 71c02d043485..99d917e0036a 100644 --- a/drivers/phy/mediatek/phy-mtk-hdmi.h +++ b/drivers/phy/mediatek/phy-mtk-hdmi.h @@ -13,6 +13,8 @@ #include <linux/module.h> #include <linux/phy/phy.h> #include <linux/platform_device.h> +#include <linux/regulator/driver.h> +#include <linux/regulator/machine.h> #include <linux/types.h> struct mtk_hdmi_phy; @@ -20,6 +22,7 @@ struct mtk_hdmi_phy; struct mtk_hdmi_phy_conf { unsigned long flags; bool pll_default_off; + const struct regulator_desc *hdmi_phy_regulator_desc; const struct clk_ops *hdmi_phy_clk_ops; void (*hdmi_phy_enable_tmds)(struct mtk_hdmi_phy *hdmi_phy); void (*hdmi_phy_disable_tmds)(struct mtk_hdmi_phy *hdmi_phy); @@ -32,6 +35,7 @@ struct mtk_hdmi_phy { struct mtk_hdmi_phy_conf *conf; struct clk *pll; struct clk_hw pll_hw; + struct regulator_dev *rdev; unsigned long pll_rate; unsigned char drv_imp_clk; unsigned char drv_imp_d2; diff --git a/drivers/phy/mediatek/phy-mtk-tphy.c b/drivers/phy/mediatek/phy-mtk-tphy.c index 3f7095ec5978..a496fbe3352b 100644 --- a/drivers/phy/mediatek/phy-mtk-tphy.c +++ b/drivers/phy/mediatek/phy-mtk-tphy.c @@ -381,17 +381,12 @@ static const char *const u3_phy_files[] = { static int u2_phy_params_show(struct seq_file *sf, void *unused) { struct mtk_phy_instance *inst = sf->private; - const char *fname = file_dentry(sf->file)->d_iname; struct u2phy_banks *u2_banks = &inst->u2_banks; void __iomem *com = u2_banks->com; u32 max = 0; u32 tmp = 0; u32 val = 0; - int ret; - - ret = match_string(u2_phy_files, ARRAY_SIZE(u2_phy_files), fname); - if (ret < 0) - return ret; + int ret = debugfs_get_aux_num(sf->file); switch (ret) { case U2P_EYE_VRT: @@ -438,7 +433,7 @@ static int u2_phy_params_show(struct seq_file *sf, void *unused) break; } - seq_printf(sf, "%s : %d [0, %d]\n", fname, val, max); + seq_printf(sf, "%s : %d [0, %d]\n", u2_phy_files[ret], val, max); return 0; } @@ -451,23 +446,18 @@ static int u2_phy_params_open(struct inode *inode, struct file *file) static ssize_t u2_phy_params_write(struct file *file, const char __user *ubuf, size_t count, loff_t *ppos) { - const char *fname = file_dentry(file)->d_iname; struct seq_file *sf = file->private_data; struct mtk_phy_instance *inst = sf->private; struct u2phy_banks *u2_banks = &inst->u2_banks; void __iomem *com = u2_banks->com; ssize_t rc; u32 val; - int ret; + int ret = debugfs_get_aux_num(file); rc = kstrtouint_from_user(ubuf, USER_BUF_LEN(count), 0, &val); if (rc) return rc; - ret = match_string(u2_phy_files, ARRAY_SIZE(u2_phy_files), fname); - if (ret < 0) - return (ssize_t)ret; - switch (ret) { case U2P_EYE_VRT: mtk_phy_update_field(com + U3P_USBPHYACR1, PA1_RG_VRT_SEL, val); @@ -516,23 +506,18 @@ static void u2_phy_dbgfs_files_create(struct mtk_phy_instance *inst) int i; for (i = 0; i < count; i++) - debugfs_create_file(u2_phy_files[i], 0644, inst->phy->debugfs, - inst, &u2_phy_fops); + debugfs_create_file_aux_num(u2_phy_files[i], 0644, inst->phy->debugfs, + inst, i, &u2_phy_fops); } static int u3_phy_params_show(struct seq_file *sf, void *unused) { struct mtk_phy_instance *inst = sf->private; - const char *fname = file_dentry(sf->file)->d_iname; struct u3phy_banks *u3_banks = &inst->u3_banks; u32 val = 0; u32 max = 0; u32 tmp; - int ret; - - ret = match_string(u3_phy_files, ARRAY_SIZE(u3_phy_files), fname); - if (ret < 0) - return ret; + int ret = debugfs_get_aux_num(sf->file); switch (ret) { case U3P_EFUSE_EN: @@ -564,7 +549,7 @@ static int u3_phy_params_show(struct seq_file *sf, void *unused) break; } - seq_printf(sf, "%s : %d [0, %d]\n", fname, val, max); + seq_printf(sf, "%s : %d [0, %d]\n", u3_phy_files[ret], val, max); return 0; } @@ -577,23 +562,18 @@ static int u3_phy_params_open(struct inode *inode, struct file *file) static ssize_t u3_phy_params_write(struct file *file, const char __user *ubuf, size_t count, loff_t *ppos) { - const char *fname = file_dentry(file)->d_iname; struct seq_file *sf = file->private_data; struct mtk_phy_instance *inst = sf->private; struct u3phy_banks *u3_banks = &inst->u3_banks; void __iomem *phyd = u3_banks->phyd; ssize_t rc; u32 val; - int ret; + int ret = debugfs_get_aux_num(sf->file); rc = kstrtouint_from_user(ubuf, USER_BUF_LEN(count), 0, &val); if (rc) return rc; - ret = match_string(u3_phy_files, ARRAY_SIZE(u3_phy_files), fname); - if (ret < 0) - return (ssize_t)ret; - switch (ret) { case U3P_EFUSE_EN: mtk_phy_update_field(phyd + U3P_U3_PHYD_RSV, @@ -636,8 +616,8 @@ static void u3_phy_dbgfs_files_create(struct mtk_phy_instance *inst) int i; for (i = 0; i < count; i++) - debugfs_create_file(u3_phy_files[i], 0644, inst->phy->debugfs, - inst, &u3_phy_fops); + debugfs_create_file_aux_num(u3_phy_files[i], 0644, inst->phy->debugfs, + inst, i, &u3_phy_fops); } static int phy_type_show(struct seq_file *sf, void *unused) diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c index 413f76e2d174..8dfdce605a90 100644 --- a/drivers/phy/phy-core.c +++ b/drivers/phy/phy-core.c @@ -749,8 +749,8 @@ EXPORT_SYMBOL_GPL(devm_phy_put); /** * of_phy_simple_xlate() - returns the phy instance from phy provider - * @dev: the PHY provider device - * @args: of_phandle_args (not used here) + * @dev: the PHY provider device (not used here) + * @args: of_phandle_args * * Intended to be used by phy provider for the common case where #phy-cells is * 0. For other cases where #phy-cells is greater than '0', the phy provider @@ -760,21 +760,14 @@ EXPORT_SYMBOL_GPL(devm_phy_put); struct phy *of_phy_simple_xlate(struct device *dev, const struct of_phandle_args *args) { - struct phy *phy; - struct class_dev_iter iter; - - class_dev_iter_init(&iter, &phy_class, NULL, NULL); - while ((dev = class_dev_iter_next(&iter))) { - phy = to_phy(dev); - if (args->np != phy->dev.of_node) - continue; + struct device *target_dev; - class_dev_iter_exit(&iter); - return phy; - } + target_dev = class_find_device_by_of_node(&phy_class, args->np); + if (!target_dev) + return ERR_PTR(-ENODEV); - class_dev_iter_exit(&iter); - return ERR_PTR(-ENODEV); + put_device(target_dev); + return to_phy(target_dev); } EXPORT_SYMBOL_GPL(of_phy_simple_xlate); diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 3bae39381fd0..b09fa00e9fe7 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -400,6 +400,57 @@ static const struct qmp_phy_init_tbl qmp_v3_usb3_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V3_PCS_RXEQTRAINING_RUN_TIME, 0x13), }; +static const struct qmp_phy_init_tbl sar2130p_usb3_serdes_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE1_MODE1, 0x55), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE2_MODE1, 0x0e), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_MODE1, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_RCTRL_MODE1, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CCTRL_MODE1, 0x36), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CORECLK_DIV_MODE1, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE1, 0x2e), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE1, 0x82), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE1, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MSB_MODE1, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START1_MODE1, 0x55), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START2_MODE1, 0xd5), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START3_MODE1, 0x05), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_SEL_1, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE1_MODE1, 0x25), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE2_MODE1, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE1_MODE1, 0xb7), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE2_MODE1, 0x1e), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0xb7), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x1e), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE1_MODE0, 0x55), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE2_MODE0, 0x0e), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_MODE0, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_RCTRL_MODE0, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CCTRL_MODE0, 0x36), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE0, 0x12), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE0, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MSB_MODE0, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START1_MODE0, 0x55), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START2_MODE0, 0xd5), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START3_MODE0, 0x05), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE1_MODE0, 0x25), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE2_MODE0, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BG_TIMER, 0x0e), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_EN_CENTER, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER1, 0x31), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER2, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SYSCLK_BUF_ENABLE, 0x0c), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SYSCLK_EN_SEL, 0x1a), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP_CFG, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE_MAP, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CORE_CLK_EN, 0x20), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CMN_CONFIG_1, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_AUTO_GAIN_ADJ_CTRL_1, 0xb6), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_AUTO_GAIN_ADJ_CTRL_2, 0x4b), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_AUTO_GAIN_ADJ_CTRL_3, 0x37), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_ADDITIONAL_MISC, 0x0c), +}; + static const struct qmp_phy_init_tbl sm6350_usb3_rx_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FASTLOCK_FO_GAIN, 0x0b), QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL2, 0x0f), @@ -1730,6 +1781,51 @@ static const struct qmp_combo_offsets qmp_combo_offsets_v5 = { .dp_dp_phy = 0x2200, }; +static const struct qmp_phy_cfg sar2130p_usb3dpphy_cfg = { + .offsets = &qmp_combo_offsets_v3, + + .serdes_tbl = sar2130p_usb3_serdes_tbl, + .serdes_tbl_num = ARRAY_SIZE(sar2130p_usb3_serdes_tbl), + .tx_tbl = sm8550_usb3_tx_tbl, + .tx_tbl_num = ARRAY_SIZE(sm8550_usb3_tx_tbl), + .rx_tbl = sm8550_usb3_rx_tbl, + .rx_tbl_num = ARRAY_SIZE(sm8550_usb3_rx_tbl), + .pcs_tbl = sm8550_usb3_pcs_tbl, + .pcs_tbl_num = ARRAY_SIZE(sm8550_usb3_pcs_tbl), + .pcs_usb_tbl = sm8550_usb3_pcs_usb_tbl, + .pcs_usb_tbl_num = ARRAY_SIZE(sm8550_usb3_pcs_usb_tbl), + + .dp_serdes_tbl = qmp_v6_dp_serdes_tbl, + .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl), + .dp_tx_tbl = qmp_v6_dp_tx_tbl, + .dp_tx_tbl_num = ARRAY_SIZE(qmp_v6_dp_tx_tbl), + + .serdes_tbl_rbr = qmp_v6_dp_serdes_tbl_rbr, + .serdes_tbl_rbr_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl_rbr), + .serdes_tbl_hbr = qmp_v6_dp_serdes_tbl_hbr, + .serdes_tbl_hbr_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl_hbr), + .serdes_tbl_hbr2 = qmp_v6_dp_serdes_tbl_hbr2, + .serdes_tbl_hbr2_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl_hbr2), + .serdes_tbl_hbr3 = qmp_v6_dp_serdes_tbl_hbr3, + .serdes_tbl_hbr3_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl_hbr3), + + .swing_hbr_rbr = &qmp_dp_v5_voltage_swing_hbr_rbr, + .pre_emphasis_hbr_rbr = &qmp_dp_v6_pre_emphasis_hbr_rbr, + .swing_hbr3_hbr2 = &qmp_dp_v5_voltage_swing_hbr3_hbr2, + .pre_emphasis_hbr3_hbr2 = &qmp_dp_v5_pre_emphasis_hbr3_hbr2, + + .dp_aux_init = qmp_v4_dp_aux_init, + .configure_dp_tx = qmp_v4_configure_dp_tx, + .configure_dp_phy = qmp_v4_configure_dp_phy, + .calibrate_dp_phy = qmp_v4_calibrate_dp_phy, + + .regs = qmp_v6_usb3phy_regs_layout, + .reset_list = msm8996_usb3phy_reset_l, + .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), +}; + static const struct qmp_phy_cfg sc7180_usb3dpphy_cfg = { .offsets = &qmp_combo_offsets_v3, @@ -3768,6 +3864,10 @@ err_node_put: static const struct of_device_id qmp_combo_of_match_table[] = { { + .compatible = "qcom,sar2130p-qmp-usb3-dp-phy", + .data = &sar2130p_usb3dpphy_cfg, + }, + { .compatible = "qcom,sc7180-qmp-usb3-dp-phy", .data = &sc7180_usb3dpphy_cfg, }, diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 873f2f9844c6..018bbb300830 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -728,6 +728,83 @@ static const struct qmp_phy_init_tbl ipq9574_gen3x2_pcie_pcs_misc_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1), }; +static const struct qmp_phy_init_tbl qcs615_pcie_serdes_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_COM_BIAS_EN_CLKBUFLR_EN, 0x18), + QMP_PHY_INIT_CFG(QSERDES_COM_CLK_ENABLE1, 0x10), + QMP_PHY_INIT_CFG(QSERDES_COM_BG_TRIM, 0xf), + QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP_EN, 0x1), + QMP_PHY_INIT_CFG(QSERDES_COM_VCO_TUNE_MAP, 0x0), + QMP_PHY_INIT_CFG(QSERDES_COM_VCO_TUNE_TIMER1, 0xff), + QMP_PHY_INIT_CFG(QSERDES_COM_VCO_TUNE_TIMER2, 0x1f), + QMP_PHY_INIT_CFG(QSERDES_COM_CMN_CONFIG, 0x6), + QMP_PHY_INIT_CFG(QSERDES_COM_PLL_IVCO, 0xf), + QMP_PHY_INIT_CFG(QSERDES_COM_HSCLK_SEL, 0x0), + QMP_PHY_INIT_CFG(QSERDES_COM_SVS_MODE_CLK_SEL, 0x1), + QMP_PHY_INIT_CFG(QSERDES_COM_CORE_CLK_EN, 0x20), + QMP_PHY_INIT_CFG(QSERDES_COM_CORECLK_DIV, 0xa), + QMP_PHY_INIT_CFG(QSERDES_COM_RESETSM_CNTRL, 0x20), + QMP_PHY_INIT_CFG(QSERDES_COM_BG_TIMER, 0x9), + QMP_PHY_INIT_CFG(QSERDES_COM_SYSCLK_EN_SEL, 0x4), + QMP_PHY_INIT_CFG(QSERDES_COM_DEC_START_MODE0, 0x82), + QMP_PHY_INIT_CFG(QSERDES_COM_DIV_FRAC_START3_MODE0, 0x3), + QMP_PHY_INIT_CFG(QSERDES_COM_DIV_FRAC_START2_MODE0, 0x55), + QMP_PHY_INIT_CFG(QSERDES_COM_DIV_FRAC_START1_MODE0, 0x55), + QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP3_MODE0, 0x0), + QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP2_MODE0, 0xd), + QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP1_MODE0, 0x04), + QMP_PHY_INIT_CFG(QSERDES_COM_CLK_SELECT, 0x35), + QMP_PHY_INIT_CFG(QSERDES_COM_SYS_CLK_CTRL, 0x2), + QMP_PHY_INIT_CFG(QSERDES_COM_SYSCLK_BUF_ENABLE, 0x1f), + QMP_PHY_INIT_CFG(QSERDES_COM_CP_CTRL_MODE0, 0x4), + QMP_PHY_INIT_CFG(QSERDES_COM_PLL_RCTRL_MODE0, 0x16), + QMP_PHY_INIT_CFG(QSERDES_COM_PLL_CCTRL_MODE0, 0x30), + QMP_PHY_INIT_CFG(QSERDES_COM_INTEGLOOP_GAIN1_MODE0, 0x0), + QMP_PHY_INIT_CFG(QSERDES_COM_INTEGLOOP_GAIN0_MODE0, 0x80), + QMP_PHY_INIT_CFG(QSERDES_COM_BIAS_EN_CTRL_BY_PSM, 0x1), + QMP_PHY_INIT_CFG(QSERDES_COM_BG_TIMER, 0xa), + QMP_PHY_INIT_CFG(QSERDES_COM_SSC_EN_CENTER, 0x1), + QMP_PHY_INIT_CFG(QSERDES_COM_SSC_PER1, 0x31), + QMP_PHY_INIT_CFG(QSERDES_COM_SSC_PER2, 0x1), + QMP_PHY_INIT_CFG(QSERDES_COM_SSC_ADJ_PER1, 0x2), + QMP_PHY_INIT_CFG(QSERDES_COM_SSC_ADJ_PER2, 0x0), + QMP_PHY_INIT_CFG(QSERDES_COM_SSC_STEP_SIZE1, 0x2f), + QMP_PHY_INIT_CFG(QSERDES_COM_SSC_STEP_SIZE2, 0x19), + QMP_PHY_INIT_CFG(QSERDES_COM_CLK_EP_DIV, 0x19), +}; + +static const struct qmp_phy_init_tbl qcs615_pcie_rx_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_RX_SIGDET_ENABLES, 0x1c), + QMP_PHY_INIT_CFG(QSERDES_RX_SIGDET_DEGLITCH_CNTRL, 0x14), + QMP_PHY_INIT_CFG(QSERDES_RX_RX_EQU_ADAPTOR_CNTRL2, 0x1), + QMP_PHY_INIT_CFG(QSERDES_RX_RX_EQU_ADAPTOR_CNTRL3, 0x0), + QMP_PHY_INIT_CFG(QSERDES_RX_RX_EQU_ADAPTOR_CNTRL4, 0xdb), + QMP_PHY_INIT_CFG(QSERDES_RX_UCDR_SO_SATURATION_AND_ENABLE, 0x4b), + QMP_PHY_INIT_CFG(QSERDES_RX_UCDR_SO_GAIN, 0x4), + QMP_PHY_INIT_CFG(QSERDES_RX_UCDR_SO_GAIN_HALF, 0x4), +}; + +static const struct qmp_phy_init_tbl qcs615_pcie_tx_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_TX_HIGHZ_TRANSCEIVEREN_BIAS_DRVR_EN, 0x45), + QMP_PHY_INIT_CFG(QSERDES_TX_LANE_MODE, 0x6), + QMP_PHY_INIT_CFG(QSERDES_TX_RES_CODE_LANE_OFFSET, 0x2), + QMP_PHY_INIT_CFG(QSERDES_TX_RCV_DETECT_LVL_2, 0x12), +}; + +static const struct qmp_phy_init_tbl qcs615_pcie_pcs_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V2_PCS_ENDPOINT_REFCLK_DRIVE, 0x4), + QMP_PHY_INIT_CFG(QPHY_V2_PCS_OSC_DTCT_ACTIONS, 0x0), + QMP_PHY_INIT_CFG(QPHY_V2_PCS_PWRUP_RESET_DLY_TIME_AUXCLK, 0x40), + QMP_PHY_INIT_CFG(QPHY_V2_PCS_L1SS_WAKEUP_DLY_TIME_AUXCLK_MSB, 0x0), + QMP_PHY_INIT_CFG(QPHY_V2_PCS_L1SS_WAKEUP_DLY_TIME_AUXCLK_LSB, 0x40), + QMP_PHY_INIT_CFG(QPHY_V2_PCS_PLL_LOCK_CHK_DLY_TIME_AUXCLK_LSB, 0x0), + QMP_PHY_INIT_CFG(QPHY_V2_PCS_LP_WAKEUP_DLY_TIME_AUXCLK, 0x40), + QMP_PHY_INIT_CFG(QPHY_V2_PCS_PLL_LOCK_CHK_DLY_TIME, 0x73), + QMP_PHY_INIT_CFG(QPHY_V2_PCS_SIGDET_CNTRL, 0x7), + QMP_PHY_INIT_CFG(QPHY_V2_PCS_RX_SIGDET_LVL, 0x99), + QMP_PHY_INIT_CFG(QPHY_V2_PCS_TXDEEMPH_M6DB_V0, 0x15), + QMP_PHY_INIT_CFG(QPHY_V2_PCS_TXDEEMPH_M3P5DB_V0, 0xe), +}; + static const struct qmp_phy_init_tbl sdm845_qmp_pcie_serdes_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN, 0x14), QMP_PHY_INIT_CFG(QSERDES_V3_COM_CLK_SELECT, 0x30), @@ -1773,7 +1850,7 @@ static const struct qmp_phy_init_tbl sdx55_qmp_pcie_rc_pcs_misc_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V4_20_PCS_PCIE_OSC_DTCT_ACTIONS, 0x00), }; -static const struct qmp_phy_init_tbl sdx55_qmp_pcie_ep_pcs_misc_tbl[] = { +static const struct qmp_phy_init_tbl sdx55_qmp_pcie_ep_pcs_lane1_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V4_20_PCS_LANE1_INSIG_SW_CTRL2, 0x00), QMP_PHY_INIT_CFG(QPHY_V4_20_PCS_LANE1_INSIG_MX_CTRL2, 0x00), }; @@ -1907,6 +1984,9 @@ static const struct qmp_phy_init_tbl sdx65_qmp_pcie_pcs_misc_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_G4_EQ_CONFIG2, 0x0d), QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_G4_EQ_CONFIG5, 0x02), QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_G4_PRE_GAIN, 0x2e), +}; + +static const struct qmp_phy_init_tbl sdx65_qmp_pcie_pcs_lane1_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_LANE1_INSIG_SW_CTRL2, 0x00), QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_LANE1_INSIG_MX_CTRL2, 0x00), }; @@ -2582,8 +2662,6 @@ static const struct qmp_phy_init_tbl sa8775p_qmp_gen4_pcie_rc_pcs_misc_tbl[] = { static const struct qmp_phy_init_tbl sa8775p_qmp_gen4x2_pcie_pcs_alt_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_EQ_CONFIG4, 0x16), QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_EQ_CONFIG5, 0x22), - QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_LANE1_INSIG_SW_CTRL2, 0x00), - QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_LANE1_INSIG_MX_CTRL2, 0x00), QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_G3S2_PRE_GAIN, 0x2e), QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_RX_SIGDET_LVL, 0x66), }; @@ -2724,10 +2802,106 @@ static const struct qmp_phy_init_tbl sa8775p_qmp_gen4x2_pcie_ep_pcs_alt_tbl[] = QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_INSIG_SW_CTRL7, 0x00), }; +static const struct qmp_phy_init_tbl sar2130p_qmp_gen3x2_pcie_rc_serdes_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_EN_CENTER, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER1, 0x31), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER2, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE1_MODE0, 0xff), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE2_MODE0, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE1_MODE1, 0x4c), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE2_MODE1, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CLK_ENABLE1, 0x90), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SYS_CLK_CTRL, 0x82), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_IVCO, 0x07), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_MODE0, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_MODE1, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_RCTRL_MODE0, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_RCTRL_MODE1, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CCTRL_MODE0, 0x36), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CCTRL_MODE1, 0x36), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SYSCLK_EN_SEL, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BG_TIMER, 0x0e), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP_EN, 0x42), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE0, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE0, 0x1a), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE1, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE1, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x82), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE1, 0x68), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START1_MODE0, 0xab), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START2_MODE0, 0xea), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START3_MODE0, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START1_MODE1, 0xab), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START2_MODE1, 0xaa), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START3_MODE1, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE_MAP, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CLK_SELECT, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_SEL_1, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CORECLK_DIV_MODE1, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CMN_CONFIG_1, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_ADDITIONAL_MISC_3, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CORE_CLK_EN, 0xa0), +}; + +static const struct qmp_phy_init_tbl sar2130p_qmp_gen3x2_pcie_pcs_lane1_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_PCIE_V6_PCS_LANE1_INSIG_SW_CTRL2, 0x01), + QMP_PHY_INIT_CFG(QPHY_PCIE_V6_PCS_LANE1_INSIG_MX_CTRL2, 0x01), +}; + +static const struct qmp_phy_init_tbl sar2130p_qmp_gen3x2_pcie_rc_tx_tbl[] = { + QMP_PHY_INIT_CFG_LANE(QSERDES_V6_TX_BIST_MODE_LANENO, 0x00, 2), +}; + +static const struct qmp_phy_init_tbl sar2130p_qmp_gen3x2_pcie_rc_pcs_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V6_PCS_G12S1_TXDEEMPH_M6DB, 0x17), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_G3S2_PRE_GAIN, 0x2e), +}; + +static const struct qmp_phy_init_tbl sar2130p_qmp_gen3x2_pcie_ep_serdes_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SYSCLK_EN_SEL, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BG_TIMER, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SYS_CLK_CTRL, 0x07), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_IVCO, 0x07), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_MODE0, 0x28), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_MODE1, 0x28), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_RCTRL_MODE0, 0x0d), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_RCTRL_MODE1, 0x0d), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CCTRL_MODE0, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CCTRL_MODE1, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP_EN, 0x42), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE0, 0xff), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE0, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE1, 0xff), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE1, 0x09), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x19), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE1, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_INTEGLOOP_GAIN0_MODE0, 0xfb), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_INTEGLOOP_GAIN1_MODE0, 0x03), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_INTEGLOOP_GAIN0_MODE1, 0xfb), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_INTEGLOOP_GAIN1_MODE1, 0x03), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE_MAP, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_SEL_1, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CORECLK_DIV_MODE1, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CMN_CONFIG_1, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CMN_MODE, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CORE_CLK_EN, 0xa0), +}; + +static const struct qmp_phy_init_tbl sar2130p_qmp_gen3x2_pcie_ep_pcs_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V6_PCS_G12S1_TXDEEMPH_M6DB, 0x17), +}; + +static const struct qmp_phy_init_tbl sar2130p_qmp_gen3x2_pcie_ep_pcs_misc_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_PCIE_V6_PCS_PCIE_EQ_CONFIG1, 0x1e), + QMP_PHY_INIT_CFG(QPHY_PCIE_V6_PCS_PCIE_POWER_STATE_CONFIG2, 0x14), + QMP_PHY_INIT_CFG(QPHY_PCIE_V6_PCS_PCIE_POWER_STATE_CONFIG4, 0x07), +}; + struct qmp_pcie_offsets { u16 serdes; u16 pcs; u16 pcs_misc; + u16 pcs_lane1; u16 tx; u16 rx; u16 tx2; @@ -2752,6 +2926,8 @@ struct qmp_phy_cfg_tbls { int pcs_num; const struct qmp_phy_init_tbl *pcs_misc; int pcs_misc_num; + const struct qmp_phy_init_tbl *pcs_lane1; + int pcs_lane1_num; const struct qmp_phy_init_tbl *ln_shrd; int ln_shrd_num; }; @@ -2811,6 +2987,7 @@ struct qmp_pcie { void __iomem *serdes; void __iomem *pcs; void __iomem *pcs_misc; + void __iomem *pcs_lane1; void __iomem *tx; void __iomem *rx; void __iomem *tx2; @@ -2927,6 +3104,7 @@ static const struct qmp_pcie_offsets qmp_pcie_offsets_v4_20 = { .serdes = 0x1000, .pcs = 0x1200, .pcs_misc = 0x1600, + .pcs_lane1 = 0x1e00, .tx = 0x0000, .rx = 0x0200, .tx2 = 0x0800, @@ -2957,6 +3135,7 @@ static const struct qmp_pcie_offsets qmp_pcie_offsets_v5_20 = { .serdes = 0x1000, .pcs = 0x1200, .pcs_misc = 0x1400, + .pcs_lane1 = 0x1e00, .tx = 0x0000, .rx = 0x0200, .tx2 = 0x0800, @@ -3132,6 +3311,31 @@ static const struct qmp_phy_cfg ipq9574_gen3x2_pciephy_cfg = { .pipe_clock_rate = 250000000, }; +static const struct qmp_phy_cfg qcs615_pciephy_cfg = { + .lanes = 1, + + .offsets = &qmp_pcie_offsets_v2, + + .tbls = { + .serdes = qcs615_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(qcs615_pcie_serdes_tbl), + .tx = qcs615_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(qcs615_pcie_tx_tbl), + .rx = qcs615_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(qcs615_pcie_rx_tbl), + .pcs = qcs615_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(qcs615_pcie_pcs_tbl), + }, + .reset_list = sdm845_pciephy_reset_l, + .num_resets = ARRAY_SIZE(sdm845_pciephy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = pciephy_v2_regs_layout, + + .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, + .phy_status = PHYSTATUS, +}; + static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = { .lanes = 1, @@ -3283,6 +3487,49 @@ static const struct qmp_phy_cfg msm8998_pciephy_cfg = { .skip_start_delay = true, }; +static const struct qmp_phy_cfg sar2130p_qmp_gen3x2_pciephy_cfg = { + .lanes = 2, + + .offsets = &qmp_pcie_offsets_v5, + + .tbls = { + .tx = sm8550_qmp_gen3x2_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sm8550_qmp_gen3x2_pcie_tx_tbl), + .rx = sm8550_qmp_gen3x2_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sm8550_qmp_gen3x2_pcie_rx_tbl), + .pcs = sm8550_qmp_gen3x2_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sm8550_qmp_gen3x2_pcie_pcs_tbl), + .pcs_lane1 = sar2130p_qmp_gen3x2_pcie_pcs_lane1_tbl, + .pcs_lane1_num = ARRAY_SIZE(sar2130p_qmp_gen3x2_pcie_pcs_lane1_tbl), + }, + .tbls_rc = &(const struct qmp_phy_cfg_tbls) { + .serdes = sar2130p_qmp_gen3x2_pcie_rc_serdes_tbl, + .serdes_num = ARRAY_SIZE(sar2130p_qmp_gen3x2_pcie_rc_serdes_tbl), + .tx = sar2130p_qmp_gen3x2_pcie_rc_tx_tbl, + .tx_num = ARRAY_SIZE(sar2130p_qmp_gen3x2_pcie_rc_tx_tbl), + .pcs = sar2130p_qmp_gen3x2_pcie_rc_pcs_tbl, + .pcs_num = ARRAY_SIZE(sar2130p_qmp_gen3x2_pcie_rc_pcs_tbl), + .pcs_misc = sm8550_qmp_gen3x2_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sm8550_qmp_gen3x2_pcie_pcs_misc_tbl), + }, + .tbls_ep = &(const struct qmp_phy_cfg_tbls) { + .serdes = sar2130p_qmp_gen3x2_pcie_ep_serdes_tbl, + .serdes_num = ARRAY_SIZE(sar2130p_qmp_gen3x2_pcie_ep_serdes_tbl), + .pcs = sar2130p_qmp_gen3x2_pcie_ep_pcs_tbl, + .pcs_num = ARRAY_SIZE(sar2130p_qmp_gen3x2_pcie_ep_pcs_tbl), + .pcs_misc = sar2130p_qmp_gen3x2_pcie_ep_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sar2130p_qmp_gen3x2_pcie_ep_pcs_misc_tbl), + }, + .reset_list = sdm845_pciephy_reset_l, + .num_resets = ARRAY_SIZE(sdm845_pciephy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = pciephy_v5_regs_layout, + + .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, + .phy_status = PHYSTATUS, +}; + static const struct qmp_phy_cfg sc8180x_pciephy_cfg = { .lanes = 2, @@ -3440,8 +3687,8 @@ static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = { .tbls_ep = &(const struct qmp_phy_cfg_tbls) { .serdes = sdx55_qmp_pcie_ep_serdes_tbl, .serdes_num = ARRAY_SIZE(sdx55_qmp_pcie_ep_serdes_tbl), - .pcs_misc = sdx55_qmp_pcie_ep_pcs_misc_tbl, - .pcs_misc_num = ARRAY_SIZE(sdx55_qmp_pcie_ep_pcs_misc_tbl), + .pcs_lane1 = sdx55_qmp_pcie_ep_pcs_lane1_tbl, + .pcs_lane1_num = ARRAY_SIZE(sdx55_qmp_pcie_ep_pcs_lane1_tbl), }, .reset_list = sdm845_pciephy_reset_l, @@ -3540,6 +3787,8 @@ static const struct qmp_phy_cfg sdx65_qmp_pciephy_cfg = { .pcs_num = ARRAY_SIZE(sdx65_qmp_pcie_pcs_tbl), .pcs_misc = sdx65_qmp_pcie_pcs_misc_tbl, .pcs_misc_num = ARRAY_SIZE(sdx65_qmp_pcie_pcs_misc_tbl), + .pcs_lane1 = sdx65_qmp_pcie_pcs_lane1_tbl, + .pcs_lane1_num = ARRAY_SIZE(sdx65_qmp_pcie_pcs_lane1_tbl), }, .reset_list = sdm845_pciephy_reset_l, .num_resets = ARRAY_SIZE(sdm845_pciephy_reset_l), @@ -3739,6 +3988,8 @@ static const struct qmp_phy_cfg sa8775p_qmp_gen4x2_pciephy_cfg = { .pcs_num = ARRAY_SIZE(sa8775p_qmp_gen4x2_pcie_pcs_alt_tbl), .pcs_misc = sa8775p_qmp_gen4_pcie_pcs_misc_tbl, .pcs_misc_num = ARRAY_SIZE(sa8775p_qmp_gen4_pcie_pcs_misc_tbl), + .pcs_lane1 = sdx65_qmp_pcie_pcs_lane1_tbl, + .pcs_lane1_num = ARRAY_SIZE(sdx65_qmp_pcie_pcs_lane1_tbl), }, .tbls_rc = &(const struct qmp_phy_cfg_tbls) { @@ -3945,6 +4196,7 @@ static void qmp_pcie_init_registers(struct qmp_pcie *qmp, const struct qmp_phy_c void __iomem *rx2 = qmp->rx2; void __iomem *pcs = qmp->pcs; void __iomem *pcs_misc = qmp->pcs_misc; + void __iomem *pcs_lane1 = qmp->pcs_lane1; void __iomem *ln_shrd = qmp->ln_shrd; if (!tbls) @@ -3969,6 +4221,7 @@ static void qmp_pcie_init_registers(struct qmp_pcie *qmp, const struct qmp_phy_c qmp_configure(qmp->dev, pcs, tbls->pcs, tbls->pcs_num); qmp_configure(qmp->dev, pcs_misc, tbls->pcs_misc, tbls->pcs_misc_num); + qmp_configure(qmp->dev, pcs_lane1, tbls->pcs_lane1, tbls->pcs_lane1_num); if (cfg->lanes >= 4 && qmp->tcsr_4ln_config) { qmp_configure(qmp->dev, serdes, cfg->serdes_4ln_tbl, @@ -4420,6 +4673,14 @@ static int qmp_pcie_parse_dt_legacy(struct qmp_pcie *qmp, struct device_node *np } } + /* + * For all platforms where legacy bindings existed, PCS_LANE1 was + * mapped as a part of the PCS_MISC region. + */ + if (!IS_ERR(qmp->pcs_misc) && cfg->offsets->pcs_lane1 != 0) + qmp->pcs_lane1 = qmp->pcs_misc + + (cfg->offsets->pcs_lane1 - cfg->offsets->pcs_misc); + clk = devm_get_clk_from_child(dev, np, NULL); if (IS_ERR(clk)) { return dev_err_probe(dev, PTR_ERR(clk), @@ -4487,6 +4748,7 @@ static int qmp_pcie_parse_dt(struct qmp_pcie *qmp) qmp->serdes = base + offs->serdes; qmp->pcs = base + offs->pcs; qmp->pcs_misc = base + offs->pcs_misc; + qmp->pcs_lane1 = base + offs->pcs_lane1; qmp->tx = base + offs->tx; qmp->rx = base + offs->rx; @@ -4612,12 +4874,18 @@ static const struct of_device_id qmp_pcie_of_match_table[] = { .compatible = "qcom,msm8998-qmp-pcie-phy", .data = &msm8998_pciephy_cfg, }, { + .compatible = "qcom,qcs615-qmp-gen3x1-pcie-phy", + .data = &qcs615_pciephy_cfg, + }, { .compatible = "qcom,sa8775p-qmp-gen4x2-pcie-phy", .data = &sa8775p_qmp_gen4x2_pciephy_cfg, }, { .compatible = "qcom,sa8775p-qmp-gen4x4-pcie-phy", .data = &sa8775p_qmp_gen4x4_pciephy_cfg, }, { + .compatible = "qcom,sar2130p-qmp-gen3x2-pcie-phy", + .data = &sar2130p_qmp_gen3x2_pciephy_cfg, + }, { .compatible = "qcom,sc8180x-qmp-pcie-phy", .data = &sc8180x_pciephy_cfg, }, { diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v4_20.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v4_20.h index ac872a9eff9a..ab892d1067c2 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v4_20.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v4_20.h @@ -13,7 +13,8 @@ #define QPHY_V4_20_PCS_PCIE_G4_RXEQEVAL_TIME 0x0f4 #define QPHY_V4_20_PCS_PCIE_G4_EQ_CONFIG2 0x0fc #define QPHY_V4_20_PCS_PCIE_G4_EQ_CONFIG5 0x108 -#define QPHY_V4_20_PCS_LANE1_INSIG_SW_CTRL2 0x824 -#define QPHY_V4_20_PCS_LANE1_INSIG_MX_CTRL2 0x828 + +#define QPHY_V4_20_PCS_LANE1_INSIG_SW_CTRL2 0x024 +#define QPHY_V4_20_PCS_LANE1_INSIG_MX_CTRL2 0x028 #endif diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h index cdf8c04ea078..283d63c81593 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h @@ -17,7 +17,8 @@ #define QPHY_V5_20_PCS_PCIE_G4_EQ_CONFIG5 0x108 #define QPHY_V5_20_PCS_PCIE_G4_PRE_GAIN 0x15c #define QPHY_V5_20_PCS_PCIE_RX_MARGINING_CONFIG3 0x184 -#define QPHY_V5_20_PCS_LANE1_INSIG_SW_CTRL2 0xa24 -#define QPHY_V5_20_PCS_LANE1_INSIG_MX_CTRL2 0xa28 + +#define QPHY_V5_20_PCS_LANE1_INSIG_SW_CTRL2 0x024 +#define QPHY_V5_20_PCS_LANE1_INSIG_MX_CTRL2 0x028 #endif diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v6.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v6.h index 0ca79333d942..45397cb3c0c6 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v6.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v6.h @@ -14,4 +14,7 @@ #define QPHY_PCIE_V6_PCS_PCIE_ENDPOINT_REFCLK_DRIVE 0x20 #define QPHY_PCIE_V6_PCS_PCIE_OSC_DTCT_ACTIONS 0x94 +#define QPHY_PCIE_V6_PCS_LANE1_INSIG_SW_CTRL2 0x024 +#define QPHY_PCIE_V6_PCS_LANE1_INSIG_MX_CTRL2 0x028 + #endif diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v2.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v2.h index bf36399d0057..1ecf4b5beba6 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v2.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v2.h @@ -34,6 +34,7 @@ #define QPHY_V2_PCS_USB_PCS_STATUS 0x17c /* USB */ #define QPHY_V2_PCS_PLL_LOCK_CHK_DLY_TIME_AUXCLK_LSB 0x1a8 #define QPHY_V2_PCS_OSC_DTCT_ACTIONS 0x1ac +#define QPHY_V2_PCS_SIGDET_CNTRL 0x1b0 #define QPHY_V2_PCS_RX_SIGDET_LVL 0x1d8 #define QPHY_V2_PCS_L1SS_WAKEUP_DLY_TIME_AUXCLK_LSB 0x1dc #define QPHY_V2_PCS_L1SS_WAKEUP_DLY_TIME_AUXCLK_MSB 0x1e0 diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v6.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v6.h index 08299d2b78f0..aa5afb921f12 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v6.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v6.h @@ -17,6 +17,8 @@ #define QPHY_V6_PCS_LOCK_DETECT_CONFIG3 0x0cc #define QPHY_V6_PCS_LOCK_DETECT_CONFIG6 0x0d8 #define QPHY_V6_PCS_REFGEN_REQ_CONFIG1 0x0dc +#define QPHY_V6_PCS_G12S1_TXDEEMPH_M6DB 0x168 +#define QPHY_V6_PCS_G3S2_PRE_GAIN 0x170 #define QPHY_V6_PCS_RX_SIGDET_LVL 0x188 #define QPHY_V6_PCS_RCVR_DTCT_DLY_P1U2_L 0x190 #define QPHY_V6_PCS_RCVR_DTCT_DLY_P1U2_H 0x194 diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6.h b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6.h index 23ffcfae9efa..f47fdc9cecda 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6.h @@ -6,6 +6,7 @@ #ifndef QCOM_PHY_QMP_QSERDES_TXRX_USB_V6_H_ #define QCOM_PHY_QMP_QSERDES_TXRX_USB_V6_H_ +#define QSERDES_V6_TX_BIST_MODE_LANENO 0x00 #define QSERDES_V6_TX_CLKBUF_ENABLE 0x08 #define QSERDES_V6_TX_TX_EMP_POST1_LVL 0x0c #define QSERDES_V6_TX_TX_DRV_LVL 0x14 diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index c9c337840715..787721570457 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2298,6 +2298,9 @@ err_node_put: static const struct of_device_id qmp_usb_of_match_table[] = { { + .compatible = "qcom,ipq5424-qmp-usb3-phy", + .data = &ipq9574_usb3phy_cfg, + }, { .compatible = "qcom,ipq6018-qmp-usb3-phy", .data = &ipq6018_usb3phy_cfg, }, { diff --git a/drivers/phy/qualcomm/phy-qcom-qusb2.c b/drivers/phy/qualcomm/phy-qcom-qusb2.c index c52655a383ce..1f5f7df14d5a 100644 --- a/drivers/phy/qualcomm/phy-qcom-qusb2.c +++ b/drivers/phy/qualcomm/phy-qcom-qusb2.c @@ -151,6 +151,34 @@ static const struct qusb2_phy_init_tbl ipq6018_init_tbl[] = { QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_AUTOPGM_CTL1, 0x9F), }; +static const struct qusb2_phy_init_tbl ipq5424_init_tbl[] = { + QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL, 0x14), + QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE1, 0x00), + QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE2, 0x53), + QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE4, 0xc3), + QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_TUNE, 0x30), + QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_USER_CTL1, 0x79), + QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_USER_CTL2, 0x21), + QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE5, 0x00), + QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_PWR_CTRL, 0x00), + QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TEST2, 0x14), + QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_TEST, 0x80), + QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_AUTOPGM_CTL1, 0x9f), +}; + +static const struct qusb2_phy_init_tbl qcs615_init_tbl[] = { + QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE1, 0xc8), + QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE2, 0xb3), + QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE3, 0x83), + QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE4, 0xc0), + QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_TUNE, 0x30), + QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_USER_CTL1, 0x79), + QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_USER_CTL2, 0x21), + QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TEST2, 0x14), + QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_AUTOPGM_CTL1, 0x9f), + QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_PWR_CTRL, 0x00), +}; + static const unsigned int ipq6018_regs_layout[] = { [QUSB2PHY_PLL_STATUS] = 0x38, [QUSB2PHY_PORT_TUNE1] = 0x80, @@ -331,6 +359,27 @@ static const struct qusb2_phy_cfg ipq6018_phy_cfg = { .autoresume_en = BIT(0), }; +static const struct qusb2_phy_cfg ipq5424_phy_cfg = { + .tbl = ipq5424_init_tbl, + .tbl_num = ARRAY_SIZE(ipq5424_init_tbl), + .regs = ipq6018_regs_layout, + + .disable_ctrl = POWER_DOWN, + .mask_core_ready = PLL_LOCKED, + .autoresume_en = BIT(0), +}; + +static const struct qusb2_phy_cfg qcs615_phy_cfg = { + .tbl = qcs615_init_tbl, + .tbl_num = ARRAY_SIZE(qcs615_init_tbl), + .regs = ipq6018_regs_layout, + + .disable_ctrl = (CLAMP_N_EN | FREEZIO_N | POWER_DOWN), + .mask_core_ready = PLL_LOCKED, + /* autoresume not used */ + .autoresume_en = BIT(0), +}; + static const struct qusb2_phy_cfg qusb2_v2_phy_cfg = { .tbl = qusb2_v2_init_tbl, .tbl_num = ARRAY_SIZE(qusb2_v2_init_tbl), @@ -905,6 +954,9 @@ static const struct phy_ops qusb2_phy_gen_ops = { static const struct of_device_id qusb2_phy_of_match_table[] = { { + .compatible = "qcom,ipq5424-qusb2-phy", + .data = &ipq5424_phy_cfg, + }, { .compatible = "qcom,ipq6018-qusb2-phy", .data = &ipq6018_phy_cfg, }, { @@ -923,6 +975,9 @@ static const struct of_device_id qusb2_phy_of_match_table[] = { .compatible = "qcom,msm8998-qusb2-phy", .data = &msm8998_phy_cfg, }, { + .compatible = "qcom,qcs615-qusb2-phy", + .data = &qcs615_phy_cfg, + }, { .compatible = "qcom,qcm2290-qusb2-phy", .data = &sm6115_phy_cfg, }, { diff --git a/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c b/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c index 2eb3329ca23f..a1532ef8bbe9 100644 --- a/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c +++ b/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c @@ -37,6 +37,10 @@ #define PHYREG8 0x1C #define PHYREG8_SSC_EN BIT(4) +#define PHYREG10 0x24 +#define PHYREG10_SSC_PCM_MASK GENMASK(3, 0) +#define PHYREG10_SSC_PCM_3500PPM 7 + #define PHYREG11 0x28 #define PHYREG11_SU_TRIM_0_7 0xF0 @@ -61,17 +65,26 @@ #define PHYREG16 0x3C #define PHYREG16_SSC_CNT_VALUE 0x5f +#define PHYREG17 0x40 + #define PHYREG18 0x44 #define PHYREG18_PLL_LOOP 0x32 +#define PHYREG21 0x50 +#define PHYREG21_RX_SQUELCH_VAL 0x0D + #define PHYREG27 0x6C #define PHYREG27_RX_TRIM_RK3588 0x4C +#define PHYREG30 0x74 + #define PHYREG32 0x7C #define PHYREG32_SSC_MASK GENMASK(7, 4) +#define PHYREG32_SSC_DIR_MASK GENMASK(5, 4) #define PHYREG32_SSC_DIR_SHIFT 4 #define PHYREG32_SSC_UPWARD 0 #define PHYREG32_SSC_DOWNWARD 1 +#define PHYREG32_SSC_OFFSET_MASK GENMASK(7, 6) #define PHYREG32_SSC_OFFSET_SHIFT 6 #define PHYREG32_SSC_OFFSET_500PPM 1 @@ -79,6 +92,7 @@ #define PHYREG33_PLL_KVCO_MASK GENMASK(4, 2) #define PHYREG33_PLL_KVCO_SHIFT 2 #define PHYREG33_PLL_KVCO_VALUE 2 +#define PHYREG33_PLL_KVCO_VALUE_RK3576 4 struct rockchip_combphy_priv; @@ -98,6 +112,7 @@ struct rockchip_combphy_grfcfg { struct combphy_reg pipe_rxterm_set; struct combphy_reg pipe_txelec_set; struct combphy_reg pipe_txcomp_set; + struct combphy_reg pipe_clk_24m; struct combphy_reg pipe_clk_25m; struct combphy_reg pipe_clk_100m; struct combphy_reg pipe_phymode_sel; @@ -584,6 +599,266 @@ static const struct rockchip_combphy_cfg rk3568_combphy_cfgs = { .combphy_cfg = rk3568_combphy_cfg, }; +static int rk3576_combphy_cfg(struct rockchip_combphy_priv *priv) +{ + const struct rockchip_combphy_grfcfg *cfg = priv->cfg->grfcfg; + unsigned long rate; + u32 val; + + switch (priv->type) { + case PHY_TYPE_PCIE: + /* Set SSC downward spread spectrum */ + val = FIELD_PREP(PHYREG32_SSC_MASK, PHYREG32_SSC_DOWNWARD); + rockchip_combphy_updatel(priv, PHYREG32_SSC_MASK, val, PHYREG32); + + rockchip_combphy_param_write(priv->phy_grf, &cfg->con0_for_pcie, true); + rockchip_combphy_param_write(priv->phy_grf, &cfg->con1_for_pcie, true); + rockchip_combphy_param_write(priv->phy_grf, &cfg->con2_for_pcie, true); + rockchip_combphy_param_write(priv->phy_grf, &cfg->con3_for_pcie, true); + break; + + case PHY_TYPE_USB3: + /* Set SSC downward spread spectrum */ + val = FIELD_PREP(PHYREG32_SSC_MASK, PHYREG32_SSC_DOWNWARD); + rockchip_combphy_updatel(priv, PHYREG32_SSC_MASK, val, PHYREG32); + + /* Enable adaptive CTLE for USB3.0 Rx */ + val = readl(priv->mmio + PHYREG15); + val |= PHYREG15_CTLE_EN; + writel(val, priv->mmio + PHYREG15); + + /* Set PLL KVCO fine tuning signals */ + rockchip_combphy_updatel(priv, PHYREG33_PLL_KVCO_MASK, BIT(3), PHYREG33); + + /* Set PLL LPF R1 to su_trim[10:7]=1001 */ + writel(PHYREG12_PLL_LPF_ADJ_VALUE, priv->mmio + PHYREG12); + + /* Set PLL input clock divider 1/2 */ + val = FIELD_PREP(PHYREG6_PLL_DIV_MASK, PHYREG6_PLL_DIV_2); + rockchip_combphy_updatel(priv, PHYREG6_PLL_DIV_MASK, val, PHYREG6); + + /* Set PLL loop divider */ + writel(PHYREG18_PLL_LOOP, priv->mmio + PHYREG18); + + /* Set PLL KVCO to min and set PLL charge pump current to max */ + writel(PHYREG11_SU_TRIM_0_7, priv->mmio + PHYREG11); + + /* Set Rx squelch input filler bandwidth */ + writel(PHYREG21_RX_SQUELCH_VAL, priv->mmio + PHYREG21); + + rockchip_combphy_param_write(priv->phy_grf, &cfg->pipe_txcomp_sel, false); + rockchip_combphy_param_write(priv->phy_grf, &cfg->pipe_txelec_sel, false); + rockchip_combphy_param_write(priv->phy_grf, &cfg->usb_mode_set, true); + break; + + case PHY_TYPE_SATA: + /* Enable adaptive CTLE for SATA Rx */ + val = readl(priv->mmio + PHYREG15); + val |= PHYREG15_CTLE_EN; + writel(val, priv->mmio + PHYREG15); + + /* Set tx_rterm = 50 ohm and rx_rterm = 43.5 ohm */ + val = PHYREG7_TX_RTERM_50OHM << PHYREG7_TX_RTERM_SHIFT; + val |= PHYREG7_RX_RTERM_44OHM << PHYREG7_RX_RTERM_SHIFT; + writel(val, priv->mmio + PHYREG7); + + rockchip_combphy_param_write(priv->phy_grf, &cfg->con0_for_sata, true); + rockchip_combphy_param_write(priv->phy_grf, &cfg->con1_for_sata, true); + rockchip_combphy_param_write(priv->phy_grf, &cfg->con2_for_sata, true); + rockchip_combphy_param_write(priv->phy_grf, &cfg->con3_for_sata, true); + rockchip_combphy_param_write(priv->pipe_grf, &cfg->pipe_con0_for_sata, true); + rockchip_combphy_param_write(priv->pipe_grf, &cfg->pipe_con1_for_sata, true); + break; + + default: + dev_err(priv->dev, "incompatible PHY type\n"); + return -EINVAL; + } + + rate = clk_get_rate(priv->refclk); + + switch (rate) { + case REF_CLOCK_24MHz: + rockchip_combphy_param_write(priv->phy_grf, &cfg->pipe_clk_24m, true); + if (priv->type == PHY_TYPE_USB3 || priv->type == PHY_TYPE_SATA) { + /* Set ssc_cnt[9:0]=0101111101 & 31.5KHz */ + val = FIELD_PREP(PHYREG15_SSC_CNT_MASK, PHYREG15_SSC_CNT_VALUE); + rockchip_combphy_updatel(priv, PHYREG15_SSC_CNT_MASK, + val, PHYREG15); + + writel(PHYREG16_SSC_CNT_VALUE, priv->mmio + PHYREG16); + } else if (priv->type == PHY_TYPE_PCIE) { + /* PLL KVCO tuning fine */ + val = FIELD_PREP(PHYREG33_PLL_KVCO_MASK, PHYREG33_PLL_KVCO_VALUE_RK3576); + rockchip_combphy_updatel(priv, PHYREG33_PLL_KVCO_MASK, + val, PHYREG33); + + /* Set up rx_pck invert and rx msb to disable */ + writel(0x00, priv->mmio + PHYREG27); + + /* + * Set up SU adjust signal: + * su_trim[7:0], PLL KVCO adjust bits[2:0] to min + * su_trim[15:8], PLL LPF R1 adujst bits[9:7]=3'b011 + * su_trim[31:24], CKDRV adjust + */ + writel(0x90, priv->mmio + PHYREG11); + writel(0x02, priv->mmio + PHYREG12); + writel(0x57, priv->mmio + PHYREG14); + + writel(PHYREG16_SSC_CNT_VALUE, priv->mmio + PHYREG16); + } + break; + + case REF_CLOCK_25MHz: + rockchip_combphy_param_write(priv->phy_grf, &cfg->pipe_clk_25m, true); + break; + + case REF_CLOCK_100MHz: + rockchip_combphy_param_write(priv->phy_grf, &cfg->pipe_clk_100m, true); + if (priv->type == PHY_TYPE_PCIE) { + /* gate_tx_pck_sel length select work for L1SS */ + writel(0xc0, priv->mmio + PHYREG30); + + /* PLL KVCO tuning fine */ + val = FIELD_PREP(PHYREG33_PLL_KVCO_MASK, PHYREG33_PLL_KVCO_VALUE_RK3576); + rockchip_combphy_updatel(priv, PHYREG33_PLL_KVCO_MASK, + val, PHYREG33); + + /* Set up rx_trim: PLL LPF C1 85pf R1 1.25kohm */ + writel(0x4c, priv->mmio + PHYREG27); + + /* + * Set up SU adjust signal: + * su_trim[7:0], PLL KVCO adjust bits[2:0] to min + * su_trim[15:8], bypass PLL loop divider code, and + * PLL LPF R1 adujst bits[9:7]=3'b101 + * su_trim[23:16], CKRCV adjust + * su_trim[31:24], CKDRV adjust + */ + writel(0x90, priv->mmio + PHYREG11); + writel(0x43, priv->mmio + PHYREG12); + writel(0x88, priv->mmio + PHYREG13); + writel(0x56, priv->mmio + PHYREG14); + } else if (priv->type == PHY_TYPE_SATA) { + /* downward spread spectrum +500ppm */ + val = FIELD_PREP(PHYREG32_SSC_DIR_MASK, PHYREG32_SSC_DOWNWARD); + val |= FIELD_PREP(PHYREG32_SSC_OFFSET_MASK, PHYREG32_SSC_OFFSET_500PPM); + rockchip_combphy_updatel(priv, PHYREG32_SSC_MASK, val, PHYREG32); + + /* ssc ppm adjust to 3500ppm */ + rockchip_combphy_updatel(priv, PHYREG10_SSC_PCM_MASK, + PHYREG10_SSC_PCM_3500PPM, + PHYREG10); + } + break; + + default: + dev_err(priv->dev, "Unsupported rate: %lu\n", rate); + return -EINVAL; + } + + if (priv->ext_refclk) { + rockchip_combphy_param_write(priv->phy_grf, &cfg->pipe_clk_ext, true); + if (priv->type == PHY_TYPE_PCIE && rate == REF_CLOCK_100MHz) { + val = FIELD_PREP(PHYREG33_PLL_KVCO_MASK, PHYREG33_PLL_KVCO_VALUE_RK3576); + rockchip_combphy_updatel(priv, PHYREG33_PLL_KVCO_MASK, + val, PHYREG33); + + /* Set up rx_trim: PLL LPF C1 85pf R1 2.5kohm */ + writel(0x0c, priv->mmio + PHYREG27); + + /* + * Set up SU adjust signal: + * su_trim[7:0], PLL KVCO adjust bits[2:0] to min + * su_trim[15:8], bypass PLL loop divider code, and + * PLL LPF R1 adujst bits[9:7]=3'b101. + * su_trim[23:16], CKRCV adjust + * su_trim[31:24], CKDRV adjust + */ + writel(0x90, priv->mmio + PHYREG11); + writel(0x43, priv->mmio + PHYREG12); + writel(0x88, priv->mmio + PHYREG13); + writel(0x56, priv->mmio + PHYREG14); + } + } + + if (priv->enable_ssc) { + val = readl(priv->mmio + PHYREG8); + val |= PHYREG8_SSC_EN; + writel(val, priv->mmio + PHYREG8); + + if (priv->type == PHY_TYPE_PCIE && rate == REF_CLOCK_24MHz) { + /* Set PLL loop divider */ + writel(0x00, priv->mmio + PHYREG17); + writel(PHYREG18_PLL_LOOP, priv->mmio + PHYREG18); + + /* Set up rx_pck invert and rx msb to disable */ + writel(0x00, priv->mmio + PHYREG27); + + /* + * Set up SU adjust signal: + * su_trim[7:0], PLL KVCO adjust bits[2:0] to min + * su_trim[15:8], PLL LPF R1 adujst bits[9:7]=3'b101 + * su_trim[23:16], CKRCV adjust + * su_trim[31:24], CKDRV adjust + */ + writel(0x90, priv->mmio + PHYREG11); + writel(0x02, priv->mmio + PHYREG12); + writel(0x08, priv->mmio + PHYREG13); + writel(0x57, priv->mmio + PHYREG14); + writel(0x40, priv->mmio + PHYREG15); + + writel(PHYREG16_SSC_CNT_VALUE, priv->mmio + PHYREG16); + + val = FIELD_PREP(PHYREG33_PLL_KVCO_MASK, PHYREG33_PLL_KVCO_VALUE_RK3576); + writel(val, priv->mmio + PHYREG33); + } + } + + return 0; +} + +static const struct rockchip_combphy_grfcfg rk3576_combphy_grfcfgs = { + /* pipe-phy-grf */ + .pcie_mode_set = { 0x0000, 5, 0, 0x00, 0x11 }, + .usb_mode_set = { 0x0000, 5, 0, 0x00, 0x04 }, + .pipe_rxterm_set = { 0x0000, 12, 12, 0x00, 0x01 }, + .pipe_txelec_set = { 0x0004, 1, 1, 0x00, 0x01 }, + .pipe_txcomp_set = { 0x0004, 4, 4, 0x00, 0x01 }, + .pipe_clk_24m = { 0x0004, 14, 13, 0x00, 0x00 }, + .pipe_clk_25m = { 0x0004, 14, 13, 0x00, 0x01 }, + .pipe_clk_100m = { 0x0004, 14, 13, 0x00, 0x02 }, + .pipe_phymode_sel = { 0x0008, 1, 1, 0x00, 0x01 }, + .pipe_rate_sel = { 0x0008, 2, 2, 0x00, 0x01 }, + .pipe_rxterm_sel = { 0x0008, 8, 8, 0x00, 0x01 }, + .pipe_txelec_sel = { 0x0008, 12, 12, 0x00, 0x01 }, + .pipe_txcomp_sel = { 0x0008, 15, 15, 0x00, 0x01 }, + .pipe_clk_ext = { 0x000c, 9, 8, 0x02, 0x01 }, + .pipe_phy_status = { 0x0034, 6, 6, 0x01, 0x00 }, + .con0_for_pcie = { 0x0000, 15, 0, 0x00, 0x1000 }, + .con1_for_pcie = { 0x0004, 15, 0, 0x00, 0x0000 }, + .con2_for_pcie = { 0x0008, 15, 0, 0x00, 0x0101 }, + .con3_for_pcie = { 0x000c, 15, 0, 0x00, 0x0200 }, + .con0_for_sata = { 0x0000, 15, 0, 0x00, 0x0129 }, + .con1_for_sata = { 0x0004, 15, 0, 0x00, 0x0000 }, + .con2_for_sata = { 0x0008, 15, 0, 0x00, 0x80c1 }, + .con3_for_sata = { 0x000c, 15, 0, 0x00, 0x0407 }, + /* php-grf */ + .pipe_con0_for_sata = { 0x001C, 2, 0, 0x00, 0x2 }, + .pipe_con1_for_sata = { 0x0020, 2, 0, 0x00, 0x2 }, +}; + +static const struct rockchip_combphy_cfg rk3576_combphy_cfgs = { + .num_phys = 2, + .phy_ids = { + 0x2b050000, + 0x2b060000 + }, + .grfcfg = &rk3576_combphy_grfcfgs, + .combphy_cfg = rk3576_combphy_cfg, +}; + static int rk3588_combphy_cfg(struct rockchip_combphy_priv *priv) { const struct rockchip_combphy_grfcfg *cfg = priv->cfg->grfcfg; @@ -776,6 +1051,10 @@ static const struct of_device_id rockchip_combphy_of_match[] = { .data = &rk3568_combphy_cfgs, }, { + .compatible = "rockchip,rk3576-naneng-combphy", + .data = &rk3576_combphy_cfgs, + }, + { .compatible = "rockchip,rk3588-naneng-combphy", .data = &rk3588_combphy_cfgs, }, diff --git a/drivers/phy/rockchip/phy-rockchip-pcie.c b/drivers/phy/rockchip/phy-rockchip-pcie.c index 51cc5ece0e63..bd44af36c67a 100644 --- a/drivers/phy/rockchip/phy-rockchip-pcie.c +++ b/drivers/phy/rockchip/phy-rockchip-pcie.c @@ -124,7 +124,7 @@ static int rockchip_pcie_phy_power_off(struct phy *phy) struct rockchip_pcie_phy *rk_phy = to_pcie_phy(inst); int err = 0; - mutex_lock(&rk_phy->pcie_mutex); + guard(mutex)(&rk_phy->pcie_mutex); regmap_write(rk_phy->reg_base, rk_phy->phy_data->pcie_laneoff, @@ -132,27 +132,22 @@ static int rockchip_pcie_phy_power_off(struct phy *phy) PHY_LANE_IDLE_MASK, PHY_LANE_IDLE_A_SHIFT + inst->index)); - if (--rk_phy->pwr_cnt) - goto err_out; + if (--rk_phy->pwr_cnt) { + return 0; + } err = reset_control_assert(rk_phy->phy_rst); if (err) { dev_err(&phy->dev, "assert phy_rst err %d\n", err); - goto err_restore; + rk_phy->pwr_cnt++; + regmap_write(rk_phy->reg_base, + rk_phy->phy_data->pcie_laneoff, + HIWORD_UPDATE(!PHY_LANE_IDLE_OFF, + PHY_LANE_IDLE_MASK, + PHY_LANE_IDLE_A_SHIFT + inst->index)); + return err; } -err_out: - mutex_unlock(&rk_phy->pcie_mutex); - return 0; - -err_restore: - rk_phy->pwr_cnt++; - regmap_write(rk_phy->reg_base, - rk_phy->phy_data->pcie_laneoff, - HIWORD_UPDATE(!PHY_LANE_IDLE_OFF, - PHY_LANE_IDLE_MASK, - PHY_LANE_IDLE_A_SHIFT + inst->index)); - mutex_unlock(&rk_phy->pcie_mutex); return err; } @@ -162,17 +157,18 @@ static int rockchip_pcie_phy_power_on(struct phy *phy) struct rockchip_pcie_phy *rk_phy = to_pcie_phy(inst); int err = 0; u32 status; - unsigned long timeout; - mutex_lock(&rk_phy->pcie_mutex); + guard(mutex)(&rk_phy->pcie_mutex); - if (rk_phy->pwr_cnt++) - goto err_out; + if (rk_phy->pwr_cnt++) { + return 0; + } err = reset_control_deassert(rk_phy->phy_rst); if (err) { dev_err(&phy->dev, "deassert phy_rst err %d\n", err); - goto err_pwr_cnt; + rk_phy->pwr_cnt--; + return err; } regmap_write(rk_phy->reg_base, rk_phy->phy_data->pcie_conf, @@ -191,21 +187,11 @@ static int rockchip_pcie_phy_power_on(struct phy *phy) * so we make it large enough here. And we use loop-break * method which should not be harmful. */ - timeout = jiffies + msecs_to_jiffies(1000); - - err = -EINVAL; - while (time_before(jiffies, timeout)) { - regmap_read(rk_phy->reg_base, - rk_phy->phy_data->pcie_status, - &status); - if (status & PHY_PLL_LOCKED) { - dev_dbg(&phy->dev, "pll locked!\n"); - err = 0; - break; - } - msleep(20); - } - + err = regmap_read_poll_timeout(rk_phy->reg_base, + rk_phy->phy_data->pcie_status, + status, + status & PHY_PLL_LOCKED, + 200, 100000); if (err) { dev_err(&phy->dev, "pll lock timeout!\n"); goto err_pll_lock; @@ -214,19 +200,11 @@ static int rockchip_pcie_phy_power_on(struct phy *phy) phy_wr_cfg(rk_phy, PHY_CFG_CLK_TEST, PHY_CFG_SEPE_RATE); phy_wr_cfg(rk_phy, PHY_CFG_CLK_SCC, PHY_CFG_PLL_100M); - err = -ETIMEDOUT; - while (time_before(jiffies, timeout)) { - regmap_read(rk_phy->reg_base, - rk_phy->phy_data->pcie_status, - &status); - if (!(status & PHY_PLL_OUTPUT)) { - dev_dbg(&phy->dev, "pll output enable done!\n"); - err = 0; - break; - } - msleep(20); - } - + err = regmap_read_poll_timeout(rk_phy->reg_base, + rk_phy->phy_data->pcie_status, + status, + !(status & PHY_PLL_OUTPUT), + 200, 100000); if (err) { dev_err(&phy->dev, "pll output enable timeout!\n"); goto err_pll_lock; @@ -236,33 +214,22 @@ static int rockchip_pcie_phy_power_on(struct phy *phy) HIWORD_UPDATE(PHY_CFG_PLL_LOCK, PHY_CFG_ADDR_MASK, PHY_CFG_ADDR_SHIFT)); - err = -EINVAL; - while (time_before(jiffies, timeout)) { - regmap_read(rk_phy->reg_base, - rk_phy->phy_data->pcie_status, - &status); - if (status & PHY_PLL_LOCKED) { - dev_dbg(&phy->dev, "pll relocked!\n"); - err = 0; - break; - } - msleep(20); - } + err = regmap_read_poll_timeout(rk_phy->reg_base, + rk_phy->phy_data->pcie_status, + status, + status & PHY_PLL_LOCKED, + 200, 100000); if (err) { dev_err(&phy->dev, "pll relock timeout!\n"); goto err_pll_lock; } -err_out: - mutex_unlock(&rk_phy->pcie_mutex); - return 0; + return err; err_pll_lock: reset_control_assert(rk_phy->phy_rst); -err_pwr_cnt: rk_phy->pwr_cnt--; - mutex_unlock(&rk_phy->pcie_mutex); return err; } @@ -272,33 +239,19 @@ static int rockchip_pcie_phy_init(struct phy *phy) struct rockchip_pcie_phy *rk_phy = to_pcie_phy(inst); int err = 0; - mutex_lock(&rk_phy->pcie_mutex); - - if (rk_phy->init_cnt++) - goto err_out; + guard(mutex)(&rk_phy->pcie_mutex); - err = clk_prepare_enable(rk_phy->clk_pciephy_ref); - if (err) { - dev_err(&phy->dev, "Fail to enable pcie ref clock.\n"); - goto err_refclk; + if (rk_phy->init_cnt++) { + return 0; } err = reset_control_assert(rk_phy->phy_rst); if (err) { dev_err(&phy->dev, "assert phy_rst err %d\n", err); - goto err_reset; + rk_phy->init_cnt--; + return err; } -err_out: - mutex_unlock(&rk_phy->pcie_mutex); - return 0; - -err_reset: - - clk_disable_unprepare(rk_phy->clk_pciephy_ref); -err_refclk: - rk_phy->init_cnt--; - mutex_unlock(&rk_phy->pcie_mutex); return err; } @@ -307,15 +260,12 @@ static int rockchip_pcie_phy_exit(struct phy *phy) struct phy_pcie_instance *inst = phy_get_drvdata(phy); struct rockchip_pcie_phy *rk_phy = to_pcie_phy(inst); - mutex_lock(&rk_phy->pcie_mutex); + guard(mutex)(&rk_phy->pcie_mutex); if (--rk_phy->init_cnt) goto err_init_cnt; - clk_disable_unprepare(rk_phy->clk_pciephy_ref); - err_init_cnt: - mutex_unlock(&rk_phy->pcie_mutex); return 0; } @@ -371,18 +321,14 @@ static int rockchip_pcie_phy_probe(struct platform_device *pdev) mutex_init(&rk_phy->pcie_mutex); rk_phy->phy_rst = devm_reset_control_get(dev, "phy"); - if (IS_ERR(rk_phy->phy_rst)) { - if (PTR_ERR(rk_phy->phy_rst) != -EPROBE_DEFER) - dev_err(dev, - "missing phy property for reset controller\n"); - return PTR_ERR(rk_phy->phy_rst); - } - - rk_phy->clk_pciephy_ref = devm_clk_get(dev, "refclk"); - if (IS_ERR(rk_phy->clk_pciephy_ref)) { - dev_err(dev, "refclk not found.\n"); - return PTR_ERR(rk_phy->clk_pciephy_ref); - } + if (IS_ERR(rk_phy->phy_rst)) + return dev_err_probe(&pdev->dev, PTR_ERR(rk_phy->phy_rst), + "missing phy property for reset controller\n"); + + rk_phy->clk_pciephy_ref = devm_clk_get_enabled(dev, "refclk"); + if (IS_ERR(rk_phy->clk_pciephy_ref)) + return dev_err_probe(&pdev->dev, PTR_ERR(rk_phy->clk_pciephy_ref), + "failed to get phyclk\n"); /* parse #phy-cells to see if it's legacy PHY model */ if (of_property_read_u32(dev->of_node, "#phy-cells", &phy_num)) diff --git a/drivers/phy/rockchip/phy-rockchip-typec.c b/drivers/phy/rockchip/phy-rockchip-typec.c index 122ae0fdc785..d9701b6106d5 100644 --- a/drivers/phy/rockchip/phy-rockchip-typec.c +++ b/drivers/phy/rockchip/phy-rockchip-typec.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Author: Chris Zhong <zyw@rock-chips.com> * Kever Yang <kever.yang@rock-chips.com> * diff --git a/drivers/phy/samsung/Kconfig b/drivers/phy/samsung/Kconfig index f10afa3d7ff5..e2330b0894d6 100644 --- a/drivers/phy/samsung/Kconfig +++ b/drivers/phy/samsung/Kconfig @@ -33,6 +33,7 @@ config PHY_SAMSUNG_UFS tristate "Exynos SoC series UFS PHY driver" depends on OF && (ARCH_EXYNOS || COMPILE_TEST) select GENERIC_PHY + select MFD_SYSCON help Enable this to support the Samsung Exynos SoC UFS PHY driver for Samsung Exynos SoCs. This driver provides the interface for UFS host diff --git a/drivers/phy/samsung/phy-samsung-ufs.c b/drivers/phy/samsung/phy-samsung-ufs.c index 6c5d41552649..8e9ccd39f97e 100644 --- a/drivers/phy/samsung/phy-samsung-ufs.c +++ b/drivers/phy/samsung/phy-samsung-ufs.c @@ -13,11 +13,11 @@ #include <linux/of.h> #include <linux/io.h> #include <linux/iopoll.h> +#include <linux/mfd/syscon.h> #include <linux/module.h> #include <linux/phy/phy.h> #include <linux/platform_device.h> #include <linux/regmap.h> -#include <linux/soc/samsung/exynos-pmu.h> #include "phy-samsung-ufs.h" @@ -268,8 +268,8 @@ static int samsung_ufs_phy_probe(struct platform_device *pdev) goto out; } - phy->reg_pmu = exynos_get_pmu_regmap_by_phandle(dev->of_node, - "samsung,pmu-syscon"); + phy->reg_pmu = syscon_regmap_lookup_by_phandle(dev->of_node, + "samsung,pmu-syscon"); if (IS_ERR(phy->reg_pmu)) { err = PTR_ERR(phy->reg_pmu); dev_err(dev, "failed syscon remap for pmu\n"); diff --git a/drivers/phy/tegra/Kconfig b/drivers/phy/tegra/Kconfig index c591c958f1eb..f30cfb42b210 100644 --- a/drivers/phy/tegra/Kconfig +++ b/drivers/phy/tegra/Kconfig @@ -13,7 +13,8 @@ config PHY_TEGRA_XUSB config PHY_TEGRA194_P2U tristate "NVIDIA Tegra194 PIPE2UPHY PHY driver" - depends on ARCH_TEGRA_194_SOC || COMPILE_TEST + depends on ARCH_TEGRA_194_SOC || ARCH_TEGRA_234_SOC || COMPILE_TEST select GENERIC_PHY help - Enable this to support the P2U (PIPE to UPHY) that is part of Tegra 19x SOCs. + Enable this to support the P2U (PIPE to UPHY) that is part of Tegra 19x + and 234 SOCs. diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index ea96a14d72d1..bf6468c56419 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -4,6 +4,7 @@ * * Copyright (C) 2010 OMICRON electronics GmbH */ +#include <linux/compat.h> #include <linux/module.h> #include <linux/posix-clock.h> #include <linux/poll.h> @@ -176,6 +177,9 @@ long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd, struct timespec64 ts; int enable, err = 0; + if (in_compat_syscall() && cmd != PTP_ENABLE_PPS && cmd != PTP_ENABLE_PPS2) + arg = (unsigned long)compat_ptr(arg); + tsevq = pccontext->private_clkdata; switch (cmd) { diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index b932425ddc6a..35a5994bf64f 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -217,6 +217,11 @@ static int ptp_getcycles64(struct ptp_clock_info *info, struct timespec64 *ts) return info->gettime64(info, ts); } +static int ptp_enable(struct ptp_clock_info *ptp, struct ptp_clock_request *request, int on) +{ + return -EOPNOTSUPP; +} + static void ptp_aux_kworker(struct kthread_work *work) { struct ptp_clock *ptp = container_of(work, struct ptp_clock, @@ -294,6 +299,9 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, ptp->info->getcrosscycles = ptp->info->getcrosststamp; } + if (!ptp->info->enable) + ptp->info->enable = ptp_enable; + if (ptp->info->do_aux_work) { kthread_init_delayed_work(&ptp->aux_work, ptp_aux_kworker); ptp->kworker = kthread_run_worker(0, "ptp%d", ptp->index); diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 99d0bc693315..ccd54c089bab 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -1285,7 +1285,7 @@ static int pwm_export_child(struct device *pwmchip_dev, struct pwm_device *pwm) return 0; } -static int pwm_unexport_match(struct device *pwm_dev, void *data) +static int pwm_unexport_match(struct device *pwm_dev, const void *data) { return pwm_from_dev(pwm_dev) == data; } diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 6c0ef1182248..89578b91c468 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -5033,7 +5033,7 @@ int _regulator_bulk_get(struct device *dev, int num_consumers, consumers[i].supply, get_type); if (IS_ERR(consumers[i].consumer)) { ret = dev_err_probe(dev, PTR_ERR(consumers[i].consumer), - "Failed to get supply '%s'", + "Failed to get supply '%s'\n", consumers[i].supply); consumers[i].consumer = NULL; goto err; diff --git a/drivers/regulator/tps6287x-regulator.c b/drivers/regulator/tps6287x-regulator.c index 97f5ce138548..c0f5f0a186a3 100644 --- a/drivers/regulator/tps6287x-regulator.c +++ b/drivers/regulator/tps6287x-regulator.c @@ -44,10 +44,35 @@ static const unsigned int tps6287x_voltage_range_sel[] = { 0x0, 0x1, 0x2, 0x3 }; +static const unsigned int tps6287x_voltage_range_prefix[] = { + 0x000, 0x100, 0x200, 0x300 +}; + static const unsigned int tps6287x_ramp_table[] = { 10000, 5000, 1250, 500 }; +struct tps6287x_reg_data { + int range; +}; + +static int tps6287x_best_range(struct regulator_config *config, const struct regulator_desc *desc) +{ + const struct linear_range *r; + int i; + + if (!config->init_data->constraints.apply_uV) + return -1; + + for (i = 0; i < desc->n_linear_ranges; i++) { + r = &desc->linear_ranges[i]; + if (r->min <= config->init_data->constraints.min_uV && + config->init_data->constraints.max_uV <= linear_range_get_max_value(r)) + return i; + } + return -1; +} + static int tps6287x_set_mode(struct regulator_dev *rdev, unsigned int mode) { unsigned int val; @@ -91,6 +116,28 @@ static unsigned int tps6287x_of_map_mode(unsigned int mode) } } +static int tps6287x_map_voltage(struct regulator_dev *rdev, int min_uV, int max_uV) +{ + struct tps6287x_reg_data *data = (struct tps6287x_reg_data *)rdev->reg_data; + struct linear_range selected_range; + int selector, voltage; + + if (!data || data->range == -1) + return regulator_map_voltage_pickable_linear_range(rdev, min_uV, max_uV); + + selected_range = rdev->desc->linear_ranges[data->range]; + selector = DIV_ROUND_UP(min_uV - selected_range.min, selected_range.step); + if (selector < selected_range.min_sel || selector > selected_range.max_sel) + return -EINVAL; + + selector |= tps6287x_voltage_range_prefix[data->range]; + voltage = rdev->desc->ops->list_voltage(rdev, selector); + if (voltage < min_uV || voltage > max_uV) + return -EINVAL; + + return selector; +} + static const struct regulator_ops tps6287x_regulator_ops = { .enable = regulator_enable_regmap, .disable = regulator_disable_regmap, @@ -100,6 +147,7 @@ static const struct regulator_ops tps6287x_regulator_ops = { .get_voltage_sel = regulator_get_voltage_sel_pickable_regmap, .set_voltage_sel = regulator_set_voltage_sel_pickable_regmap, .list_voltage = regulator_list_voltage_pickable_linear_range, + .map_voltage = tps6287x_map_voltage, .set_ramp_delay = regulator_set_ramp_delay_regmap, }; @@ -130,8 +178,14 @@ static int tps6287x_i2c_probe(struct i2c_client *i2c) { struct device *dev = &i2c->dev; struct regulator_config config = {}; + struct tps6287x_reg_data *reg_data; struct regulator_dev *rdev; + reg_data = devm_kzalloc(dev, sizeof(struct tps6287x_reg_data), GFP_KERNEL); + + if (!reg_data) + return -ENOMEM; + config.regmap = devm_regmap_init_i2c(i2c, &tps6287x_regmap_config); if (IS_ERR(config.regmap)) { dev_err(dev, "Failed to init i2c\n"); @@ -143,12 +197,15 @@ static int tps6287x_i2c_probe(struct i2c_client *i2c) config.init_data = of_get_regulator_init_data(dev, dev->of_node, &tps6287x_reg); + reg_data->range = tps6287x_best_range(&config, &tps6287x_reg); + rdev = devm_regulator_register(dev, &tps6287x_reg, &config); if (IS_ERR(rdev)) { dev_err(dev, "Failed to register regulator\n"); return PTR_ERR(rdev); } + rdev->reg_data = (void *)reg_data; dev_dbg(dev, "Probed regulator\n"); return 0; diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c index 712c06c02696..207b64c0a2fe 100644 --- a/drivers/rpmsg/rpmsg_core.c +++ b/drivers/rpmsg/rpmsg_core.c @@ -377,9 +377,9 @@ EXPORT_SYMBOL(rpmsg_get_mtu); * this is used to make sure we're not creating rpmsg devices for channels * that already exist. */ -static int rpmsg_device_match(struct device *dev, void *data) +static int rpmsg_device_match(struct device *dev, const void *data) { - struct rpmsg_channel_info *chinfo = data; + const struct rpmsg_channel_info *chinfo = data; struct rpmsg_device *rpdev = to_rpmsg_device(dev); if (chinfo->src != RPMSG_ADDR_ANY && chinfo->src != rpdev->src) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index a60bcc791a48..0bbbf778ecfa 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -1316,7 +1316,7 @@ config RTC_DRV_SC27XX config RTC_DRV_SPEAR tristate "SPEAR ST RTC" depends on PLAT_SPEAR || COMPILE_TEST - default y + default PLAT_SPEAR help If you say Y here you will get support for the RTC found on spear diff --git a/drivers/rtc/rtc-88pm80x.c b/drivers/rtc/rtc-88pm80x.c index 5c39cf252392..a3e52a5a708f 100644 --- a/drivers/rtc/rtc-88pm80x.c +++ b/drivers/rtc/rtc-88pm80x.c @@ -308,7 +308,7 @@ static int pm80x_rtc_probe(struct platform_device *pdev) /* remember whether this power up is caused by PMIC RTC or not */ info->rtc_dev->dev.platform_data = &pdata->rtc_wakeup; - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); return 0; out_rtc: diff --git a/drivers/rtc/rtc-88pm860x.c b/drivers/rtc/rtc-88pm860x.c index 814230d61842..964cd048fcdb 100644 --- a/drivers/rtc/rtc-88pm860x.c +++ b/drivers/rtc/rtc-88pm860x.c @@ -326,7 +326,7 @@ static int pm860x_rtc_probe(struct platform_device *pdev) schedule_delayed_work(&info->calib_work, VRTC_CALIB_INTERVAL); #endif /* VRTC_CALIBRATION */ - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); return 0; } diff --git a/drivers/rtc/rtc-amlogic-a4.c b/drivers/rtc/rtc-amlogic-a4.c index 2278b4c98a71..09d78c2cc691 100644 --- a/drivers/rtc/rtc-amlogic-a4.c +++ b/drivers/rtc/rtc-amlogic-a4.c @@ -361,7 +361,7 @@ static int aml_rtc_probe(struct platform_device *pdev) "failed to get_enable rtc sys clk\n"); aml_rtc_init(rtc); - device_init_wakeup(dev, 1); + device_init_wakeup(dev, true); platform_set_drvdata(pdev, rtc); rtc->rtc_dev = devm_rtc_allocate_device(dev); @@ -391,7 +391,7 @@ static int aml_rtc_probe(struct platform_device *pdev) return 0; err_clk: clk_disable_unprepare(rtc->sys_clk); - device_init_wakeup(dev, 0); + device_init_wakeup(dev, false); return ret; } @@ -426,7 +426,7 @@ static void aml_rtc_remove(struct platform_device *pdev) struct aml_rtc_data *rtc = dev_get_drvdata(&pdev->dev); clk_disable_unprepare(rtc->sys_clk); - device_init_wakeup(&pdev->dev, 0); + device_init_wakeup(&pdev->dev, false); } static const struct aml_rtc_config a5_rtc_config = { diff --git a/drivers/rtc/rtc-armada38x.c b/drivers/rtc/rtc-armada38x.c index 569c1054d6b0..713fa0d077cd 100644 --- a/drivers/rtc/rtc-armada38x.c +++ b/drivers/rtc/rtc-armada38x.c @@ -527,7 +527,7 @@ static __init int armada38x_rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, rtc); if (rtc->irq != -1) - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); else clear_bit(RTC_FEATURE_ALARM, rtc->rtc_dev->features); diff --git a/drivers/rtc/rtc-as3722.c b/drivers/rtc/rtc-as3722.c index 0f21af27f4cf..9682d6457b7f 100644 --- a/drivers/rtc/rtc-as3722.c +++ b/drivers/rtc/rtc-as3722.c @@ -187,7 +187,7 @@ static int as3722_rtc_probe(struct platform_device *pdev) return ret; } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); as3722_rtc->rtc = devm_rtc_device_register(&pdev->dev, "as3722-rtc", &as3722_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c index 9b3898b8de7c..f6b0102a843a 100644 --- a/drivers/rtc/rtc-at91rm9200.c +++ b/drivers/rtc/rtc-at91rm9200.c @@ -528,7 +528,7 @@ static int __init at91_rtc_probe(struct platform_device *pdev) * being wake-capable; if it didn't, do that here. */ if (!device_can_wakeup(&pdev->dev)) - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); if (at91_rtc_config->has_correction) rtc->ops = &sama5d4_rtc_ops; diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c index 15b21da2788f..38991cca5930 100644 --- a/drivers/rtc/rtc-at91sam9.c +++ b/drivers/rtc/rtc-at91sam9.c @@ -353,7 +353,7 @@ static int at91_rtc_probe(struct platform_device *pdev) /* platform setup code should have handled this; sigh */ if (!device_can_wakeup(&pdev->dev)) - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); platform_set_drvdata(pdev, rtc); diff --git a/drivers/rtc/rtc-cadence.c b/drivers/rtc/rtc-cadence.c index bf2a9a1fdea7..8634eea799ab 100644 --- a/drivers/rtc/rtc-cadence.c +++ b/drivers/rtc/rtc-cadence.c @@ -359,7 +359,7 @@ static void cdns_rtc_remove(struct platform_device *pdev) struct cdns_rtc *crtc = platform_get_drvdata(pdev); cdns_rtc_alarm_irq_enable(&pdev->dev, 0); - device_init_wakeup(&pdev->dev, 0); + device_init_wakeup(&pdev->dev, false); clk_disable_unprepare(crtc->pclk); clk_disable_unprepare(crtc->ref_clk); diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 78f2ce12c75a..8172869bd3d7 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -151,11 +151,6 @@ static inline int hpet_set_periodic_freq(unsigned long freq) return 0; } -static inline int hpet_rtc_dropped_irq(void) -{ - return 0; -} - static inline int hpet_rtc_timer_init(void) { return 0; @@ -864,7 +859,7 @@ static void acpi_cmos_wake_setup(struct device *dev) dev_info(dev, "RTC can wake from S4\n"); /* RTC always wakes from S1/S2/S3, and often S4/STD */ - device_init_wakeup(dev, 1); + device_init_wakeup(dev, true); } static void cmos_check_acpi_rtc_status(struct device *dev, diff --git a/drivers/rtc/rtc-cpcap.c b/drivers/rtc/rtc-cpcap.c index afc8fcba8f88..568a89e79c11 100644 --- a/drivers/rtc/rtc-cpcap.c +++ b/drivers/rtc/rtc-cpcap.c @@ -295,7 +295,7 @@ static int cpcap_rtc_probe(struct platform_device *pdev) } disable_irq(rtc->update_irq); - err = device_init_wakeup(dev, 1); + err = device_init_wakeup(dev, true); if (err) { dev_err(dev, "wakeup initialization failed (%d)\n", err); /* ignore error and continue without wakeup support */ diff --git a/drivers/rtc/rtc-cros-ec.c b/drivers/rtc/rtc-cros-ec.c index 60a48c3ba3ca..865c2e82c7a5 100644 --- a/drivers/rtc/rtc-cros-ec.c +++ b/drivers/rtc/rtc-cros-ec.c @@ -337,7 +337,7 @@ static int cros_ec_rtc_probe(struct platform_device *pdev) return ret; } - ret = device_init_wakeup(&pdev->dev, 1); + ret = device_init_wakeup(&pdev->dev, true); if (ret) { dev_err(&pdev->dev, "failed to initialize wakeup\n"); return ret; diff --git a/drivers/rtc/rtc-da9055.c b/drivers/rtc/rtc-da9055.c index 844168fcae1e..05adec6b77bf 100644 --- a/drivers/rtc/rtc-da9055.c +++ b/drivers/rtc/rtc-da9055.c @@ -288,7 +288,7 @@ static int da9055_rtc_probe(struct platform_device *pdev) if (ret & DA9055_RTC_ALM_EN) rtc->alarm_enable = 1; - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &da9055_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-ds3232.c b/drivers/rtc/rtc-ds3232.c index dd37b055693c..19c09c418746 100644 --- a/drivers/rtc/rtc-ds3232.c +++ b/drivers/rtc/rtc-ds3232.c @@ -508,7 +508,7 @@ static int ds3232_probe(struct device *dev, struct regmap *regmap, int irq, return ret; if (ds3232->irq > 0) - device_init_wakeup(dev, 1); + device_init_wakeup(dev, true); ds3232_hwmon_register(dev, name); diff --git a/drivers/rtc/rtc-isl1208.c b/drivers/rtc/rtc-isl1208.c index 7b82e4a14b7a..f71a6bb77b2a 100644 --- a/drivers/rtc/rtc-isl1208.c +++ b/drivers/rtc/rtc-isl1208.c @@ -830,7 +830,7 @@ static int isl1208_setup_irq(struct i2c_client *client, int irq) isl1208_driver.driver.name, client); if (!rc) { - device_init_wakeup(&client->dev, 1); + device_init_wakeup(&client->dev, true); enable_irq_wake(irq); } else { dev_err(&client->dev, diff --git a/drivers/rtc/rtc-jz4740.c b/drivers/rtc/rtc-jz4740.c index bafa7d1b9b88..44bba356268c 100644 --- a/drivers/rtc/rtc-jz4740.c +++ b/drivers/rtc/rtc-jz4740.c @@ -367,7 +367,7 @@ static int jz4740_rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, rtc); - device_init_wakeup(dev, 1); + device_init_wakeup(dev, true); ret = dev_pm_set_wake_irq(dev, irq); if (ret) diff --git a/drivers/rtc/rtc-loongson.c b/drivers/rtc/rtc-loongson.c index 8d713e563d7c..97e5625c064c 100644 --- a/drivers/rtc/rtc-loongson.c +++ b/drivers/rtc/rtc-loongson.c @@ -114,6 +114,13 @@ static irqreturn_t loongson_rtc_isr(int irq, void *id) struct loongson_rtc_priv *priv = (struct loongson_rtc_priv *)id; rtc_update_irq(priv->rtcdev, 1, RTC_AF | RTC_IRQF); + + /* + * The TOY_MATCH0_REG should be cleared 0 here, + * otherwise the interrupt cannot be cleared. + */ + regmap_write(priv->regmap, TOY_MATCH0_REG, 0); + return IRQ_HANDLED; } @@ -131,11 +138,7 @@ static u32 loongson_rtc_handler(void *id) writel(RTC_STS, priv->pm_base + PM1_STS_REG); spin_unlock(&priv->lock); - /* - * The TOY_MATCH0_REG should be cleared 0 here, - * otherwise the interrupt cannot be cleared. - */ - return regmap_write(priv->regmap, TOY_MATCH0_REG, 0); + return ACPI_INTERRUPT_HANDLED; } static int loongson_rtc_set_enabled(struct device *dev) @@ -329,7 +332,7 @@ static int loongson_rtc_probe(struct platform_device *pdev) alarm_irq); priv->pm_base = regs - priv->config->pm_offset; - device_init_wakeup(dev, 1); + device_init_wakeup(dev, true); if (has_acpi_companion(dev)) acpi_install_fixed_event_handler(ACPI_EVENT_RTC, @@ -360,7 +363,7 @@ static void loongson_rtc_remove(struct platform_device *pdev) acpi_remove_fixed_event_handler(ACPI_EVENT_RTC, loongson_rtc_handler); - device_init_wakeup(dev, 0); + device_init_wakeup(dev, false); loongson_rtc_alarm_irq_enable(dev, 0); } diff --git a/drivers/rtc/rtc-lp8788.c b/drivers/rtc/rtc-lp8788.c index c0b8fbce1082..0793d70507f7 100644 --- a/drivers/rtc/rtc-lp8788.c +++ b/drivers/rtc/rtc-lp8788.c @@ -293,7 +293,7 @@ static int lp8788_rtc_probe(struct platform_device *pdev) rtc->alarm = lp->pdata ? lp->pdata->alarm_sel : DEFAULT_ALARM_SEL; platform_set_drvdata(pdev, rtc); - device_init_wakeup(dev, 1); + device_init_wakeup(dev, true); rtc->rdev = devm_rtc_device_register(dev, "lp8788_rtc", &lp8788_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-lpc32xx.c b/drivers/rtc/rtc-lpc32xx.c index 76ad7031a13d..74280bffe1b0 100644 --- a/drivers/rtc/rtc-lpc32xx.c +++ b/drivers/rtc/rtc-lpc32xx.c @@ -257,7 +257,7 @@ static int lpc32xx_rtc_probe(struct platform_device *pdev) dev_warn(&pdev->dev, "Can't request interrupt.\n"); rtc->irq = -1; } else { - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); } } diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c index a8f4b645c09d..7bb044d2ac25 100644 --- a/drivers/rtc/rtc-max77686.c +++ b/drivers/rtc/rtc-max77686.c @@ -770,7 +770,7 @@ static int max77686_rtc_probe(struct platform_device *pdev) goto err_rtc; } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); info->rtc_dev = devm_rtc_device_register(&pdev->dev, id->name, &max77686_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-max8925.c b/drivers/rtc/rtc-max8925.c index 64bb8ac6ef62..6ce8afbeac68 100644 --- a/drivers/rtc/rtc-max8925.c +++ b/drivers/rtc/rtc-max8925.c @@ -270,7 +270,7 @@ static int max8925_rtc_probe(struct platform_device *pdev) /* XXX - isn't this redundant? */ platform_set_drvdata(pdev, info); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); info->rtc_dev = devm_rtc_device_register(&pdev->dev, "max8925-rtc", &max8925_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-max8997.c b/drivers/rtc/rtc-max8997.c index 20e50d9fdf88..e7618d715bd8 100644 --- a/drivers/rtc/rtc-max8997.c +++ b/drivers/rtc/rtc-max8997.c @@ -473,7 +473,7 @@ static int max8997_rtc_probe(struct platform_device *pdev) max8997_rtc_enable_wtsr(info, true); max8997_rtc_enable_smpl(info, true); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); info->rtc_dev = devm_rtc_device_register(&pdev->dev, "max8997-rtc", &max8997_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-meson-vrtc.c b/drivers/rtc/rtc-meson-vrtc.c index 648fa362ec44..5849729f7d01 100644 --- a/drivers/rtc/rtc-meson-vrtc.c +++ b/drivers/rtc/rtc-meson-vrtc.c @@ -74,7 +74,7 @@ static int meson_vrtc_probe(struct platform_device *pdev) if (IS_ERR(vrtc->io_alarm)) return PTR_ERR(vrtc->io_alarm); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); platform_set_drvdata(pdev, vrtc); diff --git a/drivers/rtc/rtc-mpc5121.c b/drivers/rtc/rtc-mpc5121.c index 600328131603..b90f8337a7e6 100644 --- a/drivers/rtc/rtc-mpc5121.c +++ b/drivers/rtc/rtc-mpc5121.c @@ -303,7 +303,7 @@ static int mpc5121_rtc_probe(struct platform_device *op) return PTR_ERR(rtc->regs); } - device_init_wakeup(&op->dev, 1); + device_init_wakeup(&op->dev, true); platform_set_drvdata(op, rtc); diff --git a/drivers/rtc/rtc-mt6397.c b/drivers/rtc/rtc-mt6397.c index 152699219a2b..6979d225a78e 100644 --- a/drivers/rtc/rtc-mt6397.c +++ b/drivers/rtc/rtc-mt6397.c @@ -286,7 +286,7 @@ static int mtk_rtc_probe(struct platform_device *pdev) return ret; } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); rtc->rtc_dev->ops = &mtk_rtc_ops; rtc->rtc_dev->range_min = RTC_TIMESTAMP_BEGIN_1900; diff --git a/drivers/rtc/rtc-mv.c b/drivers/rtc/rtc-mv.c index 51029c536244..c27ad626d09f 100644 --- a/drivers/rtc/rtc-mv.c +++ b/drivers/rtc/rtc-mv.c @@ -264,7 +264,7 @@ static int __init mv_rtc_probe(struct platform_device *pdev) } if (pdata->irq >= 0) - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); else clear_bit(RTC_FEATURE_ALARM, pdata->rtc->features); @@ -287,7 +287,7 @@ static void __exit mv_rtc_remove(struct platform_device *pdev) struct rtc_plat_data *pdata = platform_get_drvdata(pdev); if (pdata->irq >= 0) - device_init_wakeup(&pdev->dev, 0); + device_init_wakeup(&pdev->dev, false); if (!IS_ERR(pdata->clk)) clk_disable_unprepare(pdata->clk); diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c index dbb935dbbd8a..608db97d450c 100644 --- a/drivers/rtc/rtc-mxc.c +++ b/drivers/rtc/rtc-mxc.c @@ -377,7 +377,7 @@ static int mxc_rtc_probe(struct platform_device *pdev) } if (pdata->irq >= 0) { - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); ret = dev_pm_set_wake_irq(&pdev->dev, pdata->irq); if (ret) dev_err(&pdev->dev, "failed to enable irq wake\n"); diff --git a/drivers/rtc/rtc-mxc_v2.c b/drivers/rtc/rtc-mxc_v2.c index 13c041bb79f1..570f27af4732 100644 --- a/drivers/rtc/rtc-mxc_v2.c +++ b/drivers/rtc/rtc-mxc_v2.c @@ -302,7 +302,7 @@ static int mxc_rtc_probe(struct platform_device *pdev) if (pdata->irq < 0) return pdata->irq; - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); ret = dev_pm_set_wake_irq(&pdev->dev, pdata->irq); if (ret) dev_err(&pdev->dev, "failed to enable irq wake\n"); diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index c123778e2d9b..0f90065e352c 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -920,7 +920,7 @@ static void omap_rtc_remove(struct platform_device *pdev) omap_rtc_power_off_rtc = NULL; } - device_init_wakeup(&pdev->dev, 0); + device_init_wakeup(&pdev->dev, false); if (!IS_ERR(rtc->clk)) clk_disable_unprepare(rtc->clk); diff --git a/drivers/rtc/rtc-palmas.c b/drivers/rtc/rtc-palmas.c index 7256a88b490c..aecada6bcf8b 100644 --- a/drivers/rtc/rtc-palmas.c +++ b/drivers/rtc/rtc-palmas.c @@ -287,7 +287,7 @@ static int palmas_rtc_probe(struct platform_device *pdev) palmas_rtc->irq = platform_get_irq(pdev, 0); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); palmas_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &palmas_rtc_ops, THIS_MODULE); if (IS_ERR(palmas_rtc->rtc)) { diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index 9c04c4e1a49c..31c7dca8f469 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -20,6 +20,7 @@ #include <linux/i2c.h> #include <linux/spi/spi.h> #include <linux/bcd.h> +#include <linux/bitfield.h> #include <linux/rtc.h> #include <linux/slab.h> #include <linux/module.h> @@ -48,6 +49,7 @@ #define PCF2127_BIT_CTRL3_BLF BIT(2) #define PCF2127_BIT_CTRL3_BF BIT(3) #define PCF2127_BIT_CTRL3_BTSE BIT(4) +#define PCF2127_CTRL3_PM GENMASK(7, 5) /* Time and date registers */ #define PCF2127_REG_TIME_BASE 0x03 #define PCF2127_BIT_SC_OSF BIT(7) @@ -331,6 +333,84 @@ static int pcf2127_rtc_set_time(struct device *dev, struct rtc_time *tm) return 0; } +static int pcf2127_param_get(struct device *dev, struct rtc_param *param) +{ + struct pcf2127 *pcf2127 = dev_get_drvdata(dev); + u32 value; + int ret; + + switch (param->param) { + case RTC_PARAM_BACKUP_SWITCH_MODE: + ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL3, &value); + if (ret < 0) + return ret; + + value = FIELD_GET(PCF2127_CTRL3_PM, value); + + if (value < 0x3) + param->uvalue = RTC_BSM_LEVEL; + else if (value < 0x6) + param->uvalue = RTC_BSM_DIRECT; + else + param->uvalue = RTC_BSM_DISABLED; + + break; + + default: + return -EINVAL; + } + + return 0; +} + +static int pcf2127_param_set(struct device *dev, struct rtc_param *param) +{ + struct pcf2127 *pcf2127 = dev_get_drvdata(dev); + u8 mode = 0; + u32 value; + int ret; + + switch (param->param) { + case RTC_PARAM_BACKUP_SWITCH_MODE: + ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL3, &value); + if (ret < 0) + return ret; + + value = FIELD_GET(PCF2127_CTRL3_PM, value); + + if (value > 5) + value -= 5; + else if (value > 2) + value -= 3; + + switch (param->uvalue) { + case RTC_BSM_LEVEL: + break; + case RTC_BSM_DIRECT: + mode = 3; + break; + case RTC_BSM_DISABLED: + if (value == 0) + value = 1; + mode = 5; + break; + default: + return -EINVAL; + } + + return regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL3, + PCF2127_CTRL3_PM, + FIELD_PREP(PCF2127_CTRL3_PM, mode + value)); + + break; + + default: + return -EINVAL; + } + + return 0; +} + static int pcf2127_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) { @@ -741,6 +821,8 @@ static const struct rtc_class_ops pcf2127_rtc_ops = { .read_alarm = pcf2127_rtc_read_alarm, .set_alarm = pcf2127_rtc_set_alarm, .alarm_irq_enable = pcf2127_rtc_alarm_irq_enable, + .param_get = pcf2127_param_get, + .param_set = pcf2127_param_set, }; /* sysfs interface */ diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index fdbc07f14036..905986c61655 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -322,7 +322,16 @@ static const struct rtc_class_ops pcf85063_rtc_ops = { static int pcf85063_nvmem_read(void *priv, unsigned int offset, void *val, size_t bytes) { - return regmap_read(priv, PCF85063_REG_RAM, val); + unsigned int tmp; + int ret; + + ret = regmap_read(priv, PCF85063_REG_RAM, &tmp); + if (ret < 0) + return ret; + + *(u8 *)val = tmp; + + return 0; } static int pcf85063_nvmem_write(void *priv, unsigned int offset, diff --git a/drivers/rtc/rtc-pic32.c b/drivers/rtc/rtc-pic32.c index bed3c27e665f..2812da2c50c5 100644 --- a/drivers/rtc/rtc-pic32.c +++ b/drivers/rtc/rtc-pic32.c @@ -330,7 +330,7 @@ static int pic32_rtc_probe(struct platform_device *pdev) pic32_rtc_enable(pdata, 1); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); pdata->rtc->ops = &pic32_rtcops; pdata->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; diff --git a/drivers/rtc/rtc-pm8xxx.c b/drivers/rtc/rtc-pm8xxx.c index 2f32187ecc8d..b2518aea4218 100644 --- a/drivers/rtc/rtc-pm8xxx.c +++ b/drivers/rtc/rtc-pm8xxx.c @@ -503,7 +503,7 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, rtc_dd); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); rtc_dd->rtc = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(rtc_dd->rtc)) diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c index 34d8545c8e15..62ee6b8f9bcd 100644 --- a/drivers/rtc/rtc-pxa.c +++ b/drivers/rtc/rtc-pxa.c @@ -360,7 +360,7 @@ static int __init pxa_rtc_probe(struct platform_device *pdev) return ret; } - device_init_wakeup(dev, 1); + device_init_wakeup(dev, true); return 0; } diff --git a/drivers/rtc/rtc-rc5t583.c b/drivers/rtc/rtc-rc5t583.c index eecb49bab56a..8ba9cda74acf 100644 --- a/drivers/rtc/rtc-rc5t583.c +++ b/drivers/rtc/rtc-rc5t583.c @@ -245,7 +245,7 @@ static int rc5t583_rtc_probe(struct platform_device *pdev) dev_err(&pdev->dev, "IRQ is not free.\n"); return ret; } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); ricoh_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &rc5t583_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-rc5t619.c b/drivers/rtc/rtc-rc5t619.c index 711f62eecd79..74d169102074 100644 --- a/drivers/rtc/rtc-rc5t619.c +++ b/drivers/rtc/rtc-rc5t619.c @@ -414,7 +414,7 @@ static int rc5t619_rtc_probe(struct platform_device *pdev) } else { /* enable wake */ - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); enable_irq_wake(rtc->irq); } } else { diff --git a/drivers/rtc/rtc-renesas-rtca3.c b/drivers/rtc/rtc-renesas-rtca3.c index d127933bfc8a..a056291d3887 100644 --- a/drivers/rtc/rtc-renesas-rtca3.c +++ b/drivers/rtc/rtc-renesas-rtca3.c @@ -768,7 +768,7 @@ static int rtca3_probe(struct platform_device *pdev) if (ret) return ret; - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); priv->rtc_dev = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(priv->rtc_dev)) diff --git a/drivers/rtc/rtc-rk808.c b/drivers/rtc/rtc-rk808.c index 2d9bcb3ce1e3..59b8e9a30fe6 100644 --- a/drivers/rtc/rtc-rk808.c +++ b/drivers/rtc/rtc-rk808.c @@ -418,7 +418,7 @@ static int rk808_rtc_probe(struct platform_device *pdev) return ret; } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); rk808_rtc->rtc = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(rk808_rtc->rtc)) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index c0ac3bdb2f42..58c957eb753d 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -456,7 +456,7 @@ static int s3c_rtc_probe(struct platform_device *pdev) dev_dbg(&pdev->dev, "s3c2410_rtc: RTCCON=%02x\n", readw(info->base + S3C2410_RTCCON)); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); info->rtc = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(info->rtc)) { diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c index dad294a0ce2a..36acca5b2639 100644 --- a/drivers/rtc/rtc-s5m.c +++ b/drivers/rtc/rtc-s5m.c @@ -729,7 +729,7 @@ static int s5m_rtc_probe(struct platform_device *pdev) info->irq, ret); return ret; } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); } return devm_rtc_register_device(info->rtc_dev); diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c index 13799b1abca1..1ad93648d69c 100644 --- a/drivers/rtc/rtc-sa1100.c +++ b/drivers/rtc/rtc-sa1100.c @@ -292,7 +292,7 @@ static int sa1100_rtc_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, info); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); return sa1100_rtc_init(pdev, info); } diff --git a/drivers/rtc/rtc-sc27xx.c b/drivers/rtc/rtc-sc27xx.c index ce7a2ddbbc16..2b83561d4d28 100644 --- a/drivers/rtc/rtc-sc27xx.c +++ b/drivers/rtc/rtc-sc27xx.c @@ -613,14 +613,14 @@ static int sprd_rtc_probe(struct platform_device *pdev) return ret; } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); rtc->rtc->ops = &sprd_rtc_ops; rtc->rtc->range_min = 0; rtc->rtc->range_max = 5662310399LL; ret = devm_rtc_register_device(rtc->rtc); if (ret) { - device_init_wakeup(&pdev->dev, 0); + device_init_wakeup(&pdev->dev, false); return ret; } diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c index a5df521876ba..9ea40f40188f 100644 --- a/drivers/rtc/rtc-sh.c +++ b/drivers/rtc/rtc-sh.c @@ -611,7 +611,7 @@ static int __init sh_rtc_probe(struct platform_device *pdev) if (ret) goto err_unmap; - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); return 0; err_unmap: diff --git a/drivers/rtc/rtc-spear.c b/drivers/rtc/rtc-spear.c index 26eed927f8b3..959acff8faff 100644 --- a/drivers/rtc/rtc-spear.c +++ b/drivers/rtc/rtc-spear.c @@ -395,7 +395,7 @@ static int spear_rtc_probe(struct platform_device *pdev) goto err_disable_clock; if (!device_can_wakeup(&pdev->dev)) - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); return 0; @@ -411,7 +411,7 @@ static void spear_rtc_remove(struct platform_device *pdev) spear_rtc_disable_interrupt(config); clk_disable_unprepare(config->clk); - device_init_wakeup(&pdev->dev, 0); + device_init_wakeup(&pdev->dev, false); } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/rtc/rtc-stm32.c b/drivers/rtc/rtc-stm32.c index 9f1a019ec8af..a0564d443569 100644 --- a/drivers/rtc/rtc-stm32.c +++ b/drivers/rtc/rtc-stm32.c @@ -1074,26 +1074,18 @@ static int stm32_rtc_probe(struct platform_device *pdev) regs = &rtc->data->regs; if (rtc->data->need_dbp) { - rtc->dbp = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, - "st,syscfg"); + unsigned int args[2]; + + rtc->dbp = syscon_regmap_lookup_by_phandle_args(pdev->dev.of_node, + "st,syscfg", + 2, args); if (IS_ERR(rtc->dbp)) { dev_err(&pdev->dev, "no st,syscfg\n"); return PTR_ERR(rtc->dbp); } - ret = of_property_read_u32_index(pdev->dev.of_node, "st,syscfg", - 1, &rtc->dbp_reg); - if (ret) { - dev_err(&pdev->dev, "can't read DBP register offset\n"); - return ret; - } - - ret = of_property_read_u32_index(pdev->dev.of_node, "st,syscfg", - 2, &rtc->dbp_mask); - if (ret) { - dev_err(&pdev->dev, "can't read DBP register mask\n"); - return ret; - } + rtc->dbp_reg = args[0]; + rtc->dbp_mask = args[1]; } if (!rtc->data->has_pclk) { diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c index e681c1745866..e5e6013d080e 100644 --- a/drivers/rtc/rtc-sun6i.c +++ b/drivers/rtc/rtc-sun6i.c @@ -826,7 +826,7 @@ static int sun6i_rtc_probe(struct platform_device *pdev) clk_prepare_enable(chip->losc); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); chip->rtc = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(chip->rtc)) diff --git a/drivers/rtc/rtc-sunplus.c b/drivers/rtc/rtc-sunplus.c index 9b1ce0e8ba27..519a06e728d6 100644 --- a/drivers/rtc/rtc-sunplus.c +++ b/drivers/rtc/rtc-sunplus.c @@ -269,7 +269,7 @@ static int sp_rtc_probe(struct platform_device *plat_dev) if (ret) goto free_reset_assert; - device_init_wakeup(&plat_dev->dev, 1); + device_init_wakeup(&plat_dev->dev, true); dev_set_drvdata(&plat_dev->dev, sp_rtc); sp_rtc->rtc = devm_rtc_allocate_device(&plat_dev->dev); @@ -307,7 +307,7 @@ static void sp_rtc_remove(struct platform_device *plat_dev) { struct sunplus_rtc *sp_rtc = dev_get_drvdata(&plat_dev->dev); - device_init_wakeup(&plat_dev->dev, 0); + device_init_wakeup(&plat_dev->dev, false); reset_control_assert(sp_rtc->rstc); clk_disable_unprepare(sp_rtc->rtcclk); } diff --git a/drivers/rtc/rtc-tegra.c b/drivers/rtc/rtc-tegra.c index 79a3102c8354..46788db89953 100644 --- a/drivers/rtc/rtc-tegra.c +++ b/drivers/rtc/rtc-tegra.c @@ -319,7 +319,7 @@ static int tegra_rtc_probe(struct platform_device *pdev) writel(0xffffffff, info->base + TEGRA_RTC_REG_INTR_STATUS); writel(0, info->base + TEGRA_RTC_REG_INTR_MASK); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); ret = devm_request_irq(&pdev->dev, info->irq, tegra_rtc_irq_handler, IRQF_TRIGGER_HIGH, dev_name(&pdev->dev), diff --git a/drivers/rtc/rtc-test.c b/drivers/rtc/rtc-test.c index 7e0d8fb26465..a68b8c884102 100644 --- a/drivers/rtc/rtc-test.c +++ b/drivers/rtc/rtc-test.c @@ -132,7 +132,7 @@ static int test_probe(struct platform_device *plat_dev) break; default: rtd->rtc->ops = &test_rtc_ops; - device_init_wakeup(&plat_dev->dev, 1); + device_init_wakeup(&plat_dev->dev, true); } timer_setup(&rtd->alarm, test_rtc_alarm_handler, 0); diff --git a/drivers/rtc/rtc-tps6586x.c b/drivers/rtc/rtc-tps6586x.c index e796729fc817..54c8429b16bf 100644 --- a/drivers/rtc/rtc-tps6586x.c +++ b/drivers/rtc/rtc-tps6586x.c @@ -241,7 +241,7 @@ static int tps6586x_rtc_probe(struct platform_device *pdev) return ret; } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); platform_set_drvdata(pdev, rtc); rtc->rtc = devm_rtc_allocate_device(&pdev->dev); diff --git a/drivers/rtc/rtc-tps65910.c b/drivers/rtc/rtc-tps65910.c index 2ea1bbfbbc2a..284aa2f0392b 100644 --- a/drivers/rtc/rtc-tps65910.c +++ b/drivers/rtc/rtc-tps65910.c @@ -418,7 +418,7 @@ static int tps65910_rtc_probe(struct platform_device *pdev) tps_rtc->irq = irq; if (irq != -1) { if (device_property_present(tps65910->dev, "wakeup-source")) - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); else device_set_wakeup_capable(&pdev->dev, 1); } else { diff --git a/drivers/rtc/rtc-tps6594.c b/drivers/rtc/rtc-tps6594.c index e69667634137..7c6246e3f029 100644 --- a/drivers/rtc/rtc-tps6594.c +++ b/drivers/rtc/rtc-tps6594.c @@ -37,7 +37,7 @@ #define MAX_OFFSET (277774) // Number of ticks per hour -#define TICKS_PER_HOUR (32768 * 3600) +#define TICKS_PER_HOUR (32768 * 3600LL) // Multiplier for ppb conversions #define PPB_MULT NANO diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c index 794429182b34..e6106e67e1f4 100644 --- a/drivers/rtc/rtc-twl.c +++ b/drivers/rtc/rtc-twl.c @@ -572,7 +572,7 @@ static int twl_rtc_probe(struct platform_device *pdev) return ret; platform_set_drvdata(pdev, twl_rtc); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); twl_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &twl_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-wm831x.c b/drivers/rtc/rtc-wm831x.c index 640833e21057..218316be942a 100644 --- a/drivers/rtc/rtc-wm831x.c +++ b/drivers/rtc/rtc-wm831x.c @@ -420,7 +420,7 @@ static int wm831x_rtc_probe(struct platform_device *pdev) if (ret & WM831X_RTC_ALM_ENA) wm831x_rtc->alarm_enabled = 1; - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); wm831x_rtc->rtc = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(wm831x_rtc->rtc)) diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c index 6797eb4d2e49..3bd60d067a5e 100644 --- a/drivers/rtc/rtc-wm8350.c +++ b/drivers/rtc/rtc-wm8350.c @@ -420,7 +420,7 @@ static int wm8350_rtc_probe(struct platform_device *pdev) } } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); wm_rtc->rtc = devm_rtc_device_register(&pdev->dev, "wm8350", &wm8350_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-xgene.c b/drivers/rtc/rtc-xgene.c index 0813ea1a03c2..6660b664e8dd 100644 --- a/drivers/rtc/rtc-xgene.c +++ b/drivers/rtc/rtc-xgene.c @@ -174,7 +174,7 @@ static int xgene_rtc_probe(struct platform_device *pdev) /* Turn on the clock and the crystal */ writel(RTC_CCR_EN, pdata->csr_base + RTC_CCR); - ret = device_init_wakeup(&pdev->dev, 1); + ret = device_init_wakeup(&pdev->dev, true); if (ret) { clk_disable_unprepare(pdata->clk); return ret; @@ -197,7 +197,7 @@ static void xgene_rtc_remove(struct platform_device *pdev) struct xgene_rtc_dev *pdata = platform_get_drvdata(pdev); xgene_rtc_alarm_irq_enable(&pdev->dev, 0); - device_init_wakeup(&pdev->dev, 0); + device_init_wakeup(&pdev->dev, false); clk_disable_unprepare(pdata->clk); } diff --git a/drivers/rtc/rtc-zynqmp.c b/drivers/rtc/rtc-zynqmp.c index af1abb69d1e3..f39102b66eac 100644 --- a/drivers/rtc/rtc-zynqmp.c +++ b/drivers/rtc/rtc-zynqmp.c @@ -318,8 +318,8 @@ static int xlnx_rtc_probe(struct platform_device *pdev) return ret; } - /* Getting the rtc_clk info */ - xrtcdev->rtc_clk = devm_clk_get_optional(&pdev->dev, "rtc_clk"); + /* Getting the rtc info */ + xrtcdev->rtc_clk = devm_clk_get_optional(&pdev->dev, "rtc"); if (IS_ERR(xrtcdev->rtc_clk)) { if (PTR_ERR(xrtcdev->rtc_clk) != -EPROBE_DEFER) dev_warn(&pdev->dev, "Device clock not found.\n"); @@ -337,7 +337,7 @@ static int xlnx_rtc_probe(struct platform_device *pdev) xlnx_init_rtc(xrtcdev); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); return devm_rtc_register_device(xrtcdev->rtc); } @@ -345,7 +345,7 @@ static int xlnx_rtc_probe(struct platform_device *pdev) static void xlnx_rtc_remove(struct platform_device *pdev) { xlnx_rtc_alarm_irq_enable(&pdev->dev, 0); - device_init_wakeup(&pdev->dev, 0); + device_init_wakeup(&pdev->dev, false); } static int __maybe_unused xlnx_rtc_suspend(struct device *dev) diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c index fbffd451031f..45bd001206a2 100644 --- a/drivers/s390/char/sclp.c +++ b/drivers/s390/char/sclp.c @@ -245,7 +245,6 @@ static void sclp_request_timeout(bool force_restart); static void sclp_process_queue(void); static void __sclp_make_read_req(void); static int sclp_init_mask(int calculate); -static int sclp_init(void); static void __sclp_queue_read_req(void) @@ -1251,8 +1250,7 @@ static struct platform_driver sclp_pdrv = { /* Initialize SCLP driver. Return zero if driver is operational, non-zero * otherwise. */ -static int -sclp_init(void) +int sclp_init(void) { unsigned long flags; int rc = 0; @@ -1305,13 +1303,7 @@ fail_unlock: static __init int sclp_initcall(void) { - int rc; - - rc = platform_driver_register(&sclp_pdrv); - if (rc) - return rc; - - return sclp_init(); + return platform_driver_register(&sclp_pdrv); } arch_initcall(sclp_initcall); diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c index 3dd50ac9c5b0..b2d93a6e36c4 100644 --- a/drivers/s390/char/vmlogrdr.c +++ b/drivers/s390/char/vmlogrdr.c @@ -123,7 +123,7 @@ static DECLARE_WAIT_QUEUE_HEAD(read_wait_queue); */ static struct vmlogrdr_priv_t sys_ser[] = { - { .system_service = "*LOGREC ", + { .system_service = { '*', 'L', 'O', 'G', 'R', 'E', 'C', ' ' }, .internal_name = "logrec", .recording_name = "EREP", .minor_num = 0, @@ -132,7 +132,7 @@ static struct vmlogrdr_priv_t sys_ser[] = { .autorecording = 1, .autopurge = 1, }, - { .system_service = "*ACCOUNT", + { .system_service = { '*', 'A', 'C', 'C', 'O', 'U', 'N', 'T' }, .internal_name = "account", .recording_name = "ACCOUNT", .minor_num = 1, @@ -141,7 +141,7 @@ static struct vmlogrdr_priv_t sys_ser[] = { .autorecording = 1, .autopurge = 1, }, - { .system_service = "*SYMPTOM", + { .system_service = { '*', 'S', 'Y', 'M', 'P', 'T', 'O', 'M' }, .internal_name = "symptom", .recording_name = "SYMPTOM", .minor_num = 2, @@ -356,7 +356,7 @@ static int vmlogrdr_open (struct inode *inode, struct file *filp) if (connect_rc) { pr_err("vmlogrdr: iucv connection to %s " "failed with rc %i \n", - logptr->system_service, connect_rc); + logptr->internal_name, connect_rc); goto out_path; } diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index cba2d048a96b..4a0b3f19bd8e 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -128,7 +128,7 @@ static int s390_vary_chpid(struct chp_id chpid, int on) * Channel measurement related functions */ static ssize_t measurement_chars_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct channel_path *chp; @@ -142,11 +142,11 @@ static ssize_t measurement_chars_read(struct file *filp, struct kobject *kobj, return memory_read_from_buffer(buf, count, &off, &chp->cmg_chars, sizeof(chp->cmg_chars)); } -static BIN_ATTR_ADMIN_RO(measurement_chars, sizeof(struct cmg_chars)); +static const BIN_ATTR_ADMIN_RO(measurement_chars, sizeof(struct cmg_chars)); static ssize_t measurement_chars_full_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct channel_path *chp = to_channelpath(kobj_to_dev(kobj)); @@ -196,22 +196,22 @@ static ssize_t chp_measurement_copy_block(void *buf, loff_t off, size_t count, } static ssize_t measurement_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { return chp_measurement_copy_block(buf, off, count, kobj, false); } -static BIN_ATTR_ADMIN_RO(measurement, sizeof(struct cmg_entry)); +static const BIN_ATTR_ADMIN_RO(measurement, sizeof(struct cmg_entry)); static ssize_t ext_measurement_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { return chp_measurement_copy_block(buf, off, count, kobj, true); } -static BIN_ATTR_ADMIN_RO(ext_measurement, sizeof(struct cmg_ext_entry)); +static const BIN_ATTR_ADMIN_RO(ext_measurement, sizeof(struct cmg_ext_entry)); -static struct bin_attribute *measurement_attrs[] = { +static const struct bin_attribute *measurement_attrs[] = { &bin_attr_measurement_chars, &bin_attr_measurement_chars_full, &bin_attr_measurement, @@ -435,7 +435,7 @@ static ssize_t speed_bps_show(struct device *dev, static DEVICE_ATTR_RO(speed_bps); static ssize_t util_string_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct channel_path *chp = to_channelpath(kobj_to_dev(kobj)); @@ -448,10 +448,10 @@ static ssize_t util_string_read(struct file *filp, struct kobject *kobj, return rc; } -static BIN_ATTR_RO(util_string, - sizeof(((struct channel_path_desc_fmt3 *)0)->util_str)); +static const BIN_ATTR_RO(util_string, + sizeof(((struct channel_path_desc_fmt3 *)0)->util_str)); -static struct bin_attribute *chp_bin_attrs[] = { +static const struct bin_attribute *const chp_bin_attrs[] = { &bin_attr_util_string, NULL, }; @@ -468,9 +468,9 @@ static struct attribute *chp_attrs[] = { &dev_attr_speed_bps.attr, NULL, }; -static struct attribute_group chp_attr_group = { +static const struct attribute_group chp_attr_group = { .attrs = chp_attrs, - .bin_attrs = chp_bin_attrs, + .bin_attrs_new = chp_bin_attrs, }; static const struct attribute_group *chp_attr_groups[] = { &chp_attr_group, diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 062ec5f24758..6b0e6b4cd8af 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -7189,7 +7189,8 @@ exit_new_nt_list: * 1: if flashnode entry is non-persistent * 0: if flashnode entry is persistent **/ -static int qla4xxx_sysfs_ddb_is_non_persistent(struct device *dev, void *data) +static int qla4xxx_sysfs_ddb_is_non_persistent(struct device *dev, + const void *data) { struct iscsi_bus_flash_session *fnode_sess; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index e7ea1f04164a..d776f13cd160 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2736,6 +2736,7 @@ int scsi_device_quiesce(struct scsi_device *sdev) { struct request_queue *q = sdev->request_queue; + unsigned int memflags; int err; /* @@ -2750,7 +2751,7 @@ scsi_device_quiesce(struct scsi_device *sdev) blk_set_pm_only(q); - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); /* * Ensure that the effect of blk_set_pm_only() will be visible * for percpu_ref_tryget() callers that occur after the queue @@ -2758,7 +2759,7 @@ scsi_device_quiesce(struct scsi_device *sdev) * was called. See also https://lwn.net/Articles/573497/. */ synchronize_rcu(); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); mutex_lock(&sdev->state_mutex); err = scsi_device_set_state(sdev, SDEV_QUIESCE); diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index f2093982b3db..087fcbfc9aaa 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -220,6 +220,7 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev, int new_shift = sbitmap_calculate_shift(depth); bool need_alloc = !sdev->budget_map.map; bool need_free = false; + unsigned int memflags; int ret; struct sbitmap sb_backup; @@ -240,7 +241,7 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev, * and here disk isn't added yet, so freezing is pretty fast */ if (need_free) { - blk_mq_freeze_queue(sdev->request_queue); + memflags = blk_mq_freeze_queue(sdev->request_queue); sb_backup = sdev->budget_map; } ret = sbitmap_init_node(&sdev->budget_map, @@ -256,7 +257,7 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev, else sbitmap_free(&sb_backup); ret = 0; - blk_mq_unfreeze_queue(sdev->request_queue); + blk_mq_unfreeze_queue(sdev->request_queue, memflags); } return ret; } diff --git a/drivers/scsi/scsi_sysctl.c b/drivers/scsi/scsi_sysctl.c index 093774d77534..be4aef0f4f99 100644 --- a/drivers/scsi/scsi_sysctl.c +++ b/drivers/scsi/scsi_sysctl.c @@ -12,7 +12,7 @@ #include "scsi_priv.h" -static struct ctl_table scsi_table[] = { +static const struct ctl_table scsi_table[] = { { .procname = "logging_level", .data = &scsi_logging_level, .maxlen = sizeof(scsi_logging_level), diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index fec8ffb8d653..9c347c64c315 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1324,7 +1324,7 @@ EXPORT_SYMBOL_GPL(iscsi_create_flashnode_conn); * 1 on success * 0 on failure */ -static int iscsi_is_flashnode_conn_dev(struct device *dev, void *data) +static int iscsi_is_flashnode_conn_dev(struct device *dev, const void *data) { return dev->bus == &iscsi_flashnode_bus; } @@ -1335,7 +1335,7 @@ static int iscsi_destroy_flashnode_conn(struct iscsi_bus_flash_conn *fnode_conn) return 0; } -static int flashnode_match_index(struct device *dev, void *data) +static int flashnode_match_index(struct device *dev, const void *data) { struct iscsi_bus_flash_session *fnode_sess = NULL; int ret = 0; @@ -1344,7 +1344,7 @@ static int flashnode_match_index(struct device *dev, void *data) goto exit_match_index; fnode_sess = iscsi_dev_to_flash_session(dev); - ret = (fnode_sess->target_id == *((int *)data)) ? 1 : 0; + ret = (fnode_sess->target_id == *((const int *)data)) ? 1 : 0; exit_match_index: return ret; @@ -1389,8 +1389,8 @@ iscsi_get_flashnode_by_index(struct Scsi_Host *shost, uint32_t idx) * %NULL on failure */ struct device * -iscsi_find_flashnode_sess(struct Scsi_Host *shost, void *data, - int (*fn)(struct device *dev, void *data)) +iscsi_find_flashnode_sess(struct Scsi_Host *shost, const void *data, + device_match_t fn) { return device_find_child(&shost->shost_gendev, data, fn); } diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 94127868bedf..effb7e768165 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1639,7 +1639,7 @@ MODULE_PARM_DESC(allow_dio, "allow direct I/O (default: 0 (disallow))"); #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> -static struct ctl_table sg_sysctls[] = { +static const struct ctl_table sg_sysctls[] = { { .procname = "sg-big-buff", .data = &sg_big_buff, diff --git a/drivers/slimbus/core.c b/drivers/slimbus/core.c index 65e5515f7555..005fa2ef100f 100644 --- a/drivers/slimbus/core.c +++ b/drivers/slimbus/core.c @@ -328,7 +328,8 @@ void slim_report_absent(struct slim_device *sbdev) } EXPORT_SYMBOL_GPL(slim_report_absent); -static bool slim_eaddr_equal(struct slim_eaddr *a, struct slim_eaddr *b) +static bool slim_eaddr_equal(const struct slim_eaddr *a, + const struct slim_eaddr *b) { return (a->manf_id == b->manf_id && a->prod_code == b->prod_code && @@ -336,9 +337,9 @@ static bool slim_eaddr_equal(struct slim_eaddr *a, struct slim_eaddr *b) a->instance == b->instance); } -static int slim_match_dev(struct device *dev, void *data) +static int slim_match_dev(struct device *dev, const void *data) { - struct slim_eaddr *e_addr = data; + const struct slim_eaddr *e_addr = data; struct slim_device *sbdev = to_slim_device(dev); return slim_eaddr_equal(&sbdev->e_addr, e_addr); @@ -384,21 +385,13 @@ struct slim_device *slim_get_device(struct slim_controller *ctrl, } EXPORT_SYMBOL_GPL(slim_get_device); -static int of_slim_match_dev(struct device *dev, void *data) -{ - struct device_node *np = data; - struct slim_device *sbdev = to_slim_device(dev); - - return (sbdev->dev.of_node == np); -} - static struct slim_device *of_find_slim_device(struct slim_controller *ctrl, struct device_node *np) { struct slim_device *sbdev; struct device *dev; - dev = device_find_child(ctrl->dev, np, of_slim_match_dev); + dev = device_find_child(ctrl->dev, np, device_match_of_node); if (dev) { sbdev = to_slim_device(dev); return sbdev; diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c index 96a7f9709720..5a54b10daf77 100644 --- a/drivers/soundwire/amd_manager.c +++ b/drivers/soundwire/amd_manager.c @@ -384,7 +384,7 @@ static u32 amd_sdw_read_ping_status(struct sdw_bus *bus) return slave_stat; } -static int amd_sdw_compute_params(struct sdw_bus *bus) +static int amd_sdw_compute_params(struct sdw_bus *bus, struct sdw_stream_runtime *stream) { struct sdw_transport_data t_data = {0}; struct sdw_master_runtime *m_rt; @@ -410,7 +410,7 @@ static int amd_sdw_compute_params(struct sdw_bus *bus) sdw_fill_xport_params(&p_rt->transport_params, p_rt->num, false, SDW_BLK_GRP_CNT_1, sample_int, port_bo, port_bo >> 8, hstart, hstop, - SDW_BLK_PKG_PER_PORT, 0x0); + SDW_BLK_PKG_PER_PORT, p_rt->lane); sdw_fill_port_params(&p_rt->port_params, p_rt->num, bps, @@ -1190,6 +1190,7 @@ static int __maybe_unused amd_resume_runtime(struct device *dev) if (amd_manager->power_mode_mask & AMD_SDW_CLK_STOP_MODE) { return amd_sdw_clock_stop_exit(amd_manager); } else if (amd_manager->power_mode_mask & AMD_SDW_POWER_OFF_MODE) { + writel(0x00, amd_manager->acp_mmio + ACP_SW_WAKE_EN(amd_manager->instance)); val = readl(amd_manager->mmio + ACP_SW_CLK_RESUME_CTRL); if (val) { val |= AMD_SDW_CLK_RESUME_REQ; diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c index d1dc62c34f1c..9b295fc9acd5 100644 --- a/drivers/soundwire/bus.c +++ b/drivers/soundwire/bus.c @@ -813,6 +813,16 @@ void sdw_extract_slave_id(struct sdw_bus *bus, } EXPORT_SYMBOL(sdw_extract_slave_id); +bool is_clock_scaling_supported_by_slave(struct sdw_slave *slave) +{ + /* + * Dynamic scaling is a defined by SDCA. However, some devices expose the class ID but + * can't support dynamic scaling. We might need a quirk to handle such devices. + */ + return slave->id.class_id; +} +EXPORT_SYMBOL(is_clock_scaling_supported_by_slave); + static int sdw_program_device_num(struct sdw_bus *bus, bool *programmed) { u8 buf[SDW_NUM_DEV_ID_REGISTERS] = {0}; @@ -1276,23 +1286,12 @@ int sdw_configure_dpn_intr(struct sdw_slave *slave, return ret; } -static int sdw_slave_set_frequency(struct sdw_slave *slave) +int sdw_slave_get_scale_index(struct sdw_slave *slave, u8 *base) { u32 mclk_freq = slave->bus->prop.mclk_freq; u32 curr_freq = slave->bus->params.curr_dr_freq >> 1; unsigned int scale; u8 scale_index; - u8 base; - int ret; - - /* - * frequency base and scale registers are required for SDCA - * devices. They may also be used for 1.2+/non-SDCA devices. - * Driver can set the property, we will need a DisCo property - * to discover this case from platform firmware. - */ - if (!slave->id.class_id && !slave->prop.clock_reg_supported) - return 0; if (!mclk_freq) { dev_err(&slave->dev, @@ -1311,19 +1310,19 @@ static int sdw_slave_set_frequency(struct sdw_slave *slave) */ if (!(19200000 % mclk_freq)) { mclk_freq = 19200000; - base = SDW_SCP_BASE_CLOCK_19200000_HZ; + *base = SDW_SCP_BASE_CLOCK_19200000_HZ; } else if (!(22579200 % mclk_freq)) { mclk_freq = 22579200; - base = SDW_SCP_BASE_CLOCK_22579200_HZ; + *base = SDW_SCP_BASE_CLOCK_22579200_HZ; } else if (!(24576000 % mclk_freq)) { mclk_freq = 24576000; - base = SDW_SCP_BASE_CLOCK_24576000_HZ; + *base = SDW_SCP_BASE_CLOCK_24576000_HZ; } else if (!(32000000 % mclk_freq)) { mclk_freq = 32000000; - base = SDW_SCP_BASE_CLOCK_32000000_HZ; + *base = SDW_SCP_BASE_CLOCK_32000000_HZ; } else if (!(96000000 % mclk_freq)) { mclk_freq = 24000000; - base = SDW_SCP_BASE_CLOCK_24000000_HZ; + *base = SDW_SCP_BASE_CLOCK_24000000_HZ; } else { dev_err(&slave->dev, "Unsupported clock base, mclk %d\n", @@ -1354,6 +1353,34 @@ static int sdw_slave_set_frequency(struct sdw_slave *slave) } scale_index++; + dev_dbg(&slave->dev, + "Configured bus base %d, scale %d, mclk %d, curr_freq %d\n", + *base, scale_index, mclk_freq, curr_freq); + + return scale_index; +} +EXPORT_SYMBOL(sdw_slave_get_scale_index); + +static int sdw_slave_set_frequency(struct sdw_slave *slave) +{ + int scale_index; + u8 base; + int ret; + + /* + * frequency base and scale registers are required for SDCA + * devices. They may also be used for 1.2+/non-SDCA devices. + * Driver can set the property directly, for now there's no + * DisCo property to discover support for the scaling registers + * from platform firmware. + */ + if (!slave->id.class_id && !slave->prop.clock_reg_supported) + return 0; + + scale_index = sdw_slave_get_scale_index(slave, &base); + if (scale_index < 0) + return scale_index; + ret = sdw_write_no_pm(slave, SDW_SCP_BUS_CLOCK_BASE, base); if (ret < 0) { dev_err(&slave->dev, @@ -1373,10 +1400,6 @@ static int sdw_slave_set_frequency(struct sdw_slave *slave) dev_err(&slave->dev, "SDW_SCP_BUSCLOCK_SCALE_B1 write failed:%d\n", ret); - dev_dbg(&slave->dev, - "Configured bus base %d, scale %d, mclk %d, curr_freq %d\n", - base, scale_index, mclk_freq, curr_freq); - return ret; } diff --git a/drivers/soundwire/bus.h b/drivers/soundwire/bus.h index fda6b24ac2da..fc990171b3f7 100644 --- a/drivers/soundwire/bus.h +++ b/drivers/soundwire/bus.h @@ -90,6 +90,7 @@ int sdw_find_col_index(int col); * @transport_params: Transport parameters * @port_params: Port parameters * @port_node: List node for Master or Slave port_list + * @lane: Which lane is used * * SoundWire spec has no mention of ports for Master interface but the * concept is logically extended. @@ -100,6 +101,7 @@ struct sdw_port_runtime { struct sdw_transport_params transport_params; struct sdw_port_params port_params; struct list_head port_node; + unsigned int lane; }; /** @@ -149,6 +151,7 @@ struct sdw_transport_data { int hstop; int block_offset; int sub_block_offset; + unsigned int lane; }; struct sdw_dpn_prop *sdw_get_slave_dpn_prop(struct sdw_slave *slave, diff --git a/drivers/soundwire/bus_type.c b/drivers/soundwire/bus_type.c index 77dc094075e1..e98d5db81b1c 100644 --- a/drivers/soundwire/bus_type.c +++ b/drivers/soundwire/bus_type.c @@ -167,9 +167,6 @@ static int sdw_drv_remove(struct device *dev) slave->probed = false; - if (slave->prop.use_domain_irq) - sdw_irq_dispose_mapping(slave); - mutex_unlock(&slave->sdw_dev_lock); if (drv->remove) diff --git a/drivers/soundwire/generic_bandwidth_allocation.c b/drivers/soundwire/generic_bandwidth_allocation.c index b9316207c3ab..59965f43c2fb 100644 --- a/drivers/soundwire/generic_bandwidth_allocation.c +++ b/drivers/soundwire/generic_bandwidth_allocation.c @@ -18,6 +18,7 @@ struct sdw_group_params { unsigned int rate; + unsigned int lane; int full_bw; int payload_bw; int hwidth; @@ -27,6 +28,7 @@ struct sdw_group { unsigned int count; unsigned int max_size; unsigned int *rates; + unsigned int *lanes; }; void sdw_compute_slave_ports(struct sdw_master_runtime *m_rt, @@ -48,6 +50,9 @@ void sdw_compute_slave_ports(struct sdw_master_runtime *m_rt, slave_total_ch = 0; list_for_each_entry(p_rt, &s_rt->port_list, port_node) { + if (p_rt->lane != t_data->lane) + continue; + ch = hweight32(p_rt->ch_mask); sdw_fill_xport_params(&p_rt->transport_params, @@ -56,7 +61,7 @@ void sdw_compute_slave_ports(struct sdw_master_runtime *m_rt, sample_int, port_bo, port_bo >> 8, t_data->hstart, t_data->hstop, - SDW_BLK_PKG_PER_PORT, 0x0); + SDW_BLK_PKG_PER_PORT, p_rt->lane); sdw_fill_port_params(&p_rt->port_params, p_rt->num, bps, @@ -105,11 +110,13 @@ static void sdw_compute_master_ports(struct sdw_master_runtime *m_rt, t_data.hstart = hstart; list_for_each_entry(p_rt, &m_rt->port_list, port_node) { + if (p_rt->lane != params->lane) + continue; sdw_fill_xport_params(&p_rt->transport_params, p_rt->num, false, SDW_BLK_GRP_CNT_1, sample_int, *port_bo, (*port_bo) >> 8, hstart, hstop, - SDW_BLK_PKG_PER_PORT, 0x0); + SDW_BLK_PKG_PER_PORT, p_rt->lane); sdw_fill_port_params(&p_rt->port_params, p_rt->num, bps, @@ -131,6 +138,7 @@ static void sdw_compute_master_ports(struct sdw_master_runtime *m_rt, (*port_bo) += bps * ch; } + t_data.lane = params->lane; sdw_compute_slave_ports(m_rt, &t_data); } @@ -138,69 +146,107 @@ static void _sdw_compute_port_params(struct sdw_bus *bus, struct sdw_group_params *params, int count) { struct sdw_master_runtime *m_rt; - int hstop = bus->params.col - 1; - int port_bo, i; + int port_bo, i, l; + int hstop; /* Run loop for all groups to compute transport parameters */ - for (i = 0; i < count; i++) { - port_bo = 1; + for (l = 0; l < SDW_MAX_LANES; l++) { + if (l > 0 && !bus->lane_used_bandwidth[l]) + continue; + /* reset hstop for each lane */ + hstop = bus->params.col - 1; + for (i = 0; i < count; i++) { + if (params[i].lane != l) + continue; + port_bo = 1; - list_for_each_entry(m_rt, &bus->m_rt_list, bus_node) { - sdw_compute_master_ports(m_rt, ¶ms[i], &port_bo, hstop); - } + list_for_each_entry(m_rt, &bus->m_rt_list, bus_node) { + sdw_compute_master_ports(m_rt, ¶ms[i], &port_bo, hstop); + } - hstop = hstop - params[i].hwidth; + hstop = hstop - params[i].hwidth; + } } } static int sdw_compute_group_params(struct sdw_bus *bus, + struct sdw_stream_runtime *stream, struct sdw_group_params *params, - int *rates, int count) + struct sdw_group *group) { struct sdw_master_runtime *m_rt; + struct sdw_port_runtime *p_rt; int sel_col = bus->params.col; unsigned int rate, bps, ch; - int i, column_needed = 0; + int i, l, column_needed; /* Calculate bandwidth per group */ - for (i = 0; i < count; i++) { - params[i].rate = rates[i]; + for (i = 0; i < group->count; i++) { + params[i].rate = group->rates[i]; + params[i].lane = group->lanes[i]; params[i].full_bw = bus->params.curr_dr_freq / params[i].rate; } list_for_each_entry(m_rt, &bus->m_rt_list, bus_node) { - rate = m_rt->stream->params.rate; - bps = m_rt->stream->params.bps; - ch = m_rt->ch_count; + if (m_rt->stream == stream) { + /* Only runtime during prepare should be added */ + if (stream->state != SDW_STREAM_CONFIGURED) + continue; + } else { + /* + * Include runtimes with running (ENABLED state) and paused (DISABLED state) + * streams + */ + if (m_rt->stream->state != SDW_STREAM_ENABLED && + m_rt->stream->state != SDW_STREAM_DISABLED) + continue; + } + list_for_each_entry(p_rt, &m_rt->port_list, port_node) { + rate = m_rt->stream->params.rate; + bps = m_rt->stream->params.bps; + ch = hweight32(p_rt->ch_mask); - for (i = 0; i < count; i++) { - if (rate == params[i].rate) - params[i].payload_bw += bps * ch; + for (i = 0; i < group->count; i++) { + if (rate == params[i].rate && p_rt->lane == params[i].lane) + params[i].payload_bw += bps * ch; + } } } - for (i = 0; i < count; i++) { - params[i].hwidth = (sel_col * - params[i].payload_bw + params[i].full_bw - 1) / - params[i].full_bw; + for (l = 0; l < SDW_MAX_LANES; l++) { + if (l > 0 && !bus->lane_used_bandwidth[l]) + continue; + /* reset column_needed for each lane */ + column_needed = 0; + for (i = 0; i < group->count; i++) { + if (params[i].lane != l) + continue; + + params[i].hwidth = (sel_col * params[i].payload_bw + + params[i].full_bw - 1) / params[i].full_bw; - column_needed += params[i].hwidth; + column_needed += params[i].hwidth; + /* There is no control column for lane 1 and above */ + if (column_needed > sel_col) + return -EINVAL; + /* Column 0 is control column on lane 0 */ + if (params[i].lane == 0 && column_needed > sel_col - 1) + return -EINVAL; + } } - if (column_needed > sel_col - 1) - return -EINVAL; return 0; } static int sdw_add_element_group_count(struct sdw_group *group, - unsigned int rate) + unsigned int rate, unsigned int lane) { int num = group->count; int i; for (i = 0; i <= num; i++) { - if (rate == group->rates[i]) + if (rate == group->rates[i] && lane == group->lanes[i]) break; if (i != num) @@ -208,6 +254,7 @@ static int sdw_add_element_group_count(struct sdw_group *group, if (group->count >= group->max_size) { unsigned int *rates; + unsigned int *lanes; group->max_size += 1; rates = krealloc(group->rates, @@ -215,10 +262,20 @@ static int sdw_add_element_group_count(struct sdw_group *group, GFP_KERNEL); if (!rates) return -ENOMEM; + group->rates = rates; + + lanes = krealloc(group->lanes, + (sizeof(int) * group->max_size), + GFP_KERNEL); + if (!lanes) + return -ENOMEM; + + group->lanes = lanes; } - group->rates[group->count++] = rate; + group->rates[group->count] = rate; + group->lanes[group->count++] = lane; } return 0; @@ -228,6 +285,7 @@ static int sdw_get_group_count(struct sdw_bus *bus, struct sdw_group *group) { struct sdw_master_runtime *m_rt; + struct sdw_port_runtime *p_rt; unsigned int rate; int ret = 0; @@ -237,17 +295,32 @@ static int sdw_get_group_count(struct sdw_bus *bus, if (!group->rates) return -ENOMEM; + group->lanes = kcalloc(group->max_size, sizeof(int), GFP_KERNEL); + if (!group->lanes) { + kfree(group->rates); + group->rates = NULL; + return -ENOMEM; + } + list_for_each_entry(m_rt, &bus->m_rt_list, bus_node) { + if (m_rt->stream->state == SDW_STREAM_DEPREPARED) + continue; + rate = m_rt->stream->params.rate; if (m_rt == list_first_entry(&bus->m_rt_list, struct sdw_master_runtime, bus_node)) { group->rates[group->count++] = rate; - - } else { - ret = sdw_add_element_group_count(group, rate); + } + /* + * Different ports could use different lane, add group element + * even if m_rt is the first entry + */ + list_for_each_entry(p_rt, &m_rt->port_list, port_node) { + ret = sdw_add_element_group_count(group, rate, p_rt->lane); if (ret < 0) { kfree(group->rates); + kfree(group->lanes); return ret; } } @@ -260,8 +333,9 @@ static int sdw_get_group_count(struct sdw_bus *bus, * sdw_compute_port_params: Compute transport and port parameters * * @bus: SDW Bus instance + * @stream: Soundwire stream */ -static int sdw_compute_port_params(struct sdw_bus *bus) +static int sdw_compute_port_params(struct sdw_bus *bus, struct sdw_stream_runtime *stream) { struct sdw_group_params *params = NULL; struct sdw_group group; @@ -281,8 +355,7 @@ static int sdw_compute_port_params(struct sdw_bus *bus) } /* Compute transport parameters for grouped streams */ - ret = sdw_compute_group_params(bus, params, - &group.rates[0], group.count); + ret = sdw_compute_group_params(bus, stream, params, &group); if (ret < 0) goto free_params; @@ -292,6 +365,7 @@ free_params: kfree(params); out: kfree(group.rates); + kfree(group.lanes); return ret; } @@ -299,7 +373,6 @@ out: static int sdw_select_row_col(struct sdw_bus *bus, int clk_freq) { struct sdw_master_prop *prop = &bus->prop; - int frame_int, frame_freq; int r, c; for (c = 0; c < SDW_FRAME_COLS; c++) { @@ -308,11 +381,8 @@ static int sdw_select_row_col(struct sdw_bus *bus, int clk_freq) sdw_cols[c] != prop->default_col) continue; - frame_int = sdw_rows[r] * sdw_cols[c]; - frame_freq = clk_freq / frame_int; - - if ((clk_freq - (frame_freq * SDW_FRAME_CTRL_BITS)) < - bus->params.bandwidth) + if (clk_freq * (sdw_cols[c] - 1) < + bus->params.bandwidth * sdw_cols[c]) continue; bus->params.row = sdw_rows[r]; @@ -324,6 +394,95 @@ static int sdw_select_row_col(struct sdw_bus *bus, int clk_freq) return -EINVAL; } +static bool is_clock_scaling_supported(struct sdw_bus *bus) +{ + struct sdw_master_runtime *m_rt; + struct sdw_slave_runtime *s_rt; + + list_for_each_entry(m_rt, &bus->m_rt_list, bus_node) + list_for_each_entry(s_rt, &m_rt->slave_rt_list, m_rt_node) + if (!is_clock_scaling_supported_by_slave(s_rt->slave)) + return false; + + return true; +} + +/** + * is_lane_connected_to_all_peripherals: Check if the given manager lane connects to all peripherals + * So that all peripherals can use the manager lane. + * + * @m_rt: Manager runtime + * @lane: Lane number + */ +static bool is_lane_connected_to_all_peripherals(struct sdw_master_runtime *m_rt, unsigned int lane) +{ + struct sdw_slave_prop *slave_prop; + struct sdw_slave_runtime *s_rt; + int i; + + list_for_each_entry(s_rt, &m_rt->slave_rt_list, m_rt_node) { + slave_prop = &s_rt->slave->prop; + for (i = 1; i < SDW_MAX_LANES; i++) { + if (slave_prop->lane_maps[i] == lane) { + dev_dbg(&s_rt->slave->dev, + "M lane %d is connected to P lane %d\n", + lane, i); + break; + } + } + if (i == SDW_MAX_LANES) { + dev_dbg(&s_rt->slave->dev, "M lane %d is not connected\n", lane); + return false; + } + } + return true; +} + +static int get_manager_lane(struct sdw_bus *bus, struct sdw_master_runtime *m_rt, + struct sdw_slave_runtime *s_rt, unsigned int curr_dr_freq) +{ + struct sdw_slave_prop *slave_prop = &s_rt->slave->prop; + struct sdw_port_runtime *m_p_rt; + unsigned int required_bandwidth; + int m_lane; + int l; + + for (l = 1; l < SDW_MAX_LANES; l++) { + if (!slave_prop->lane_maps[l]) + continue; + + required_bandwidth = 0; + list_for_each_entry(m_p_rt, &m_rt->port_list, port_node) { + required_bandwidth += m_rt->stream->params.rate * + hweight32(m_p_rt->ch_mask) * + m_rt->stream->params.bps; + } + if (required_bandwidth <= + curr_dr_freq - bus->lane_used_bandwidth[l]) { + /* Check if m_lane is connected to all Peripherals */ + if (!is_lane_connected_to_all_peripherals(m_rt, + slave_prop->lane_maps[l])) { + dev_dbg(bus->dev, + "Not all Peripherals are connected to M lane %d\n", + slave_prop->lane_maps[l]); + continue; + } + m_lane = slave_prop->lane_maps[l]; + dev_dbg(&s_rt->slave->dev, "M lane %d is used\n", m_lane); + bus->lane_used_bandwidth[l] += required_bandwidth; + /* + * Use non-zero manager lane, subtract the lane 0 + * bandwidth that is already calculated + */ + bus->params.bandwidth -= required_bandwidth; + return m_lane; + } + } + + /* No available multi lane found, only lane 0 can be used */ + return 0; +} + /** * sdw_compute_bus_params: Compute bus parameters * @@ -331,10 +490,16 @@ static int sdw_select_row_col(struct sdw_bus *bus, int clk_freq) */ static int sdw_compute_bus_params(struct sdw_bus *bus) { - unsigned int curr_dr_freq = 0; struct sdw_master_prop *mstr_prop = &bus->prop; - int i, clk_values, ret; + struct sdw_slave_prop *slave_prop; + struct sdw_port_runtime *m_p_rt; + struct sdw_port_runtime *s_p_rt; + struct sdw_master_runtime *m_rt; + struct sdw_slave_runtime *s_rt; + unsigned int curr_dr_freq = 0; + int i, l, clk_values, ret; bool is_gear = false; + int m_lane = 0; u32 *clk_buf; if (mstr_prop->num_clk_gears) { @@ -349,6 +514,10 @@ static int sdw_compute_bus_params(struct sdw_bus *bus) clk_buf = NULL; } + /* If dynamic scaling is not supported, don't try higher freq */ + if (!is_clock_scaling_supported(bus)) + clk_values = 1; + for (i = 0; i < clk_values; i++) { if (!clk_buf) curr_dr_freq = bus->params.max_dr_freq; @@ -357,10 +526,26 @@ static int sdw_compute_bus_params(struct sdw_bus *bus) (bus->params.max_dr_freq >> clk_buf[i]) : clk_buf[i] * SDW_DOUBLE_RATE_FACTOR; - if (curr_dr_freq <= bus->params.bandwidth) - continue; + if (curr_dr_freq * (mstr_prop->default_col - 1) >= + bus->params.bandwidth * mstr_prop->default_col) + break; - break; + list_for_each_entry(m_rt, &bus->m_rt_list, bus_node) { + /* + * Get the first s_rt that will be used to find the available lane that + * can be used. No need to check all Peripherals because we can't use + * multi-lane if we can't find any available lane for the first Peripheral. + */ + s_rt = list_first_entry(&m_rt->slave_rt_list, + struct sdw_slave_runtime, m_rt_node); + + /* + * Find the available Manager lane that connected to the first Peripheral. + */ + m_lane = get_manager_lane(bus, m_rt, s_rt, curr_dr_freq); + if (m_lane > 0) + goto out; + } /* * TODO: Check all the Slave(s) port(s) audio modes and find @@ -374,6 +559,38 @@ static int sdw_compute_bus_params(struct sdw_bus *bus) __func__, bus->params.bandwidth); return -EINVAL; } +out: + /* multilane can be used */ + if (m_lane > 0) { + /* Set Peripheral lanes */ + list_for_each_entry(s_rt, &m_rt->slave_rt_list, m_rt_node) { + slave_prop = &s_rt->slave->prop; + for (l = 1; l < SDW_MAX_LANES; l++) { + if (slave_prop->lane_maps[l] == m_lane) { + list_for_each_entry(s_p_rt, &s_rt->port_list, port_node) { + s_p_rt->lane = l; + dev_dbg(&s_rt->slave->dev, + "Set P lane %d for port %d\n", + l, s_p_rt->num); + } + break; + } + } + } + /* + * Set Manager lanes. Configure the last m_rt in bus->m_rt_list only since + * we don't want to touch other m_rts that are already working. + */ + list_for_each_entry(m_p_rt, &m_rt->port_list, port_node) { + m_p_rt->lane = m_lane; + } + } + + if (!mstr_prop->default_frame_rate || !mstr_prop->default_row) + return -EINVAL; + + mstr_prop->default_col = curr_dr_freq / mstr_prop->default_frame_rate / + mstr_prop->default_row; ret = sdw_select_row_col(bus, curr_dr_freq); if (ret < 0) { @@ -390,8 +607,9 @@ static int sdw_compute_bus_params(struct sdw_bus *bus) * sdw_compute_params: Compute bus, transport and port parameters * * @bus: SDW Bus instance + * @stream: Soundwire stream */ -int sdw_compute_params(struct sdw_bus *bus) +int sdw_compute_params(struct sdw_bus *bus, struct sdw_stream_runtime *stream) { int ret; @@ -401,7 +619,7 @@ int sdw_compute_params(struct sdw_bus *bus) return ret; /* Compute transport and port params */ - ret = sdw_compute_port_params(bus); + ret = sdw_compute_port_params(bus, stream); if (ret < 0) { dev_err(bus->dev, "Compute transport params failed: %d\n", ret); return ret; diff --git a/drivers/soundwire/irq.c b/drivers/soundwire/irq.c index 0c08cebb1235..c237e6d0766b 100644 --- a/drivers/soundwire/irq.c +++ b/drivers/soundwire/irq.c @@ -46,14 +46,18 @@ void sdw_irq_delete(struct sdw_bus *bus) irq_domain_remove(bus->domain); } +static void sdw_irq_dispose_mapping(void *data) +{ + struct sdw_slave *slave = data; + + irq_dispose_mapping(irq_find_mapping(slave->bus->domain, slave->dev_num)); +} + void sdw_irq_create_mapping(struct sdw_slave *slave) { slave->irq = irq_create_mapping(slave->bus->domain, slave->dev_num); if (!slave->irq) dev_warn(&slave->dev, "Failed to map IRQ\n"); -} -void sdw_irq_dispose_mapping(struct sdw_slave *slave) -{ - irq_dispose_mapping(irq_find_mapping(slave->bus->domain, slave->dev_num)); + devm_add_action_or_reset(&slave->dev, sdw_irq_dispose_mapping, slave); } diff --git a/drivers/soundwire/irq.h b/drivers/soundwire/irq.h index 58a58046d92b..86e2318409da 100644 --- a/drivers/soundwire/irq.h +++ b/drivers/soundwire/irq.h @@ -16,7 +16,6 @@ int sdw_irq_create(struct sdw_bus *bus, struct fwnode_handle *fwnode); void sdw_irq_delete(struct sdw_bus *bus); void sdw_irq_create_mapping(struct sdw_slave *slave); -void sdw_irq_dispose_mapping(struct sdw_slave *slave); #else /* CONFIG_IRQ_DOMAIN */ @@ -34,10 +33,6 @@ static inline void sdw_irq_create_mapping(struct sdw_slave *slave) { } -static inline void sdw_irq_dispose_mapping(struct sdw_slave *slave) -{ -} - #endif /* CONFIG_IRQ_DOMAIN */ #endif /* __SDW_IRQ_H */ diff --git a/drivers/soundwire/mipi_disco.c b/drivers/soundwire/mipi_disco.c index 9d59f486edbe..65afb28ef8fa 100644 --- a/drivers/soundwire/mipi_disco.c +++ b/drivers/soundwire/mipi_disco.c @@ -366,6 +366,44 @@ static int sdw_slave_read_dpn(struct sdw_slave *slave, return 0; } +/* + * In MIPI DisCo spec for SoundWire, lane mapping for a slave device is done with + * mipi-sdw-lane-x-mapping properties, where x is 1..7, and the values for those + * properties are mipi-sdw-manager-lane-x or mipi-sdw-peripheral-link-y, where x + * is an integer between 1 to 7 if the lane is connected to a manager lane, y is a + * character between A to E if the lane is connected to another peripheral lane. + */ +int sdw_slave_read_lane_mapping(struct sdw_slave *slave) +{ + struct sdw_slave_prop *prop = &slave->prop; + struct device *dev = &slave->dev; + char prop_name[30]; + const char *prop_val; + size_t len; + int ret, i; + u8 lane; + + for (i = 0; i < SDW_MAX_LANES; i++) { + snprintf(prop_name, sizeof(prop_name), "mipi-sdw-lane-%d-mapping", i); + ret = device_property_read_string(dev, prop_name, &prop_val); + if (ret) + continue; + + len = strlen(prop_val); + if (len < 1) + return -EINVAL; + + /* The last character is enough to identify the connection */ + ret = kstrtou8(&prop_val[len - 1], 10, &lane); + if (ret) + return ret; + if (in_range(lane, 1, SDW_MAX_LANES - 1)) + prop->lane_maps[i] = lane; + } + return 0; +} +EXPORT_SYMBOL(sdw_slave_read_lane_mapping); + /** * sdw_slave_read_prop() - Read Slave properties * @slave: SDW Slave @@ -486,6 +524,6 @@ int sdw_slave_read_prop(struct sdw_slave *slave) sdw_slave_read_dpn(slave, prop->sink_dpn_prop, nval, prop->sink_ports, "sink"); - return 0; + return sdw_slave_read_lane_mapping(slave); } EXPORT_SYMBOL(sdw_slave_read_prop); diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c index e00c5ac496a6..0f45e3404756 100644 --- a/drivers/soundwire/qcom.c +++ b/drivers/soundwire/qcom.c @@ -1072,7 +1072,7 @@ static const struct sdw_master_ops qcom_swrm_ops = { .pre_bank_switch = qcom_swrm_pre_bank_switch, }; -static int qcom_swrm_compute_params(struct sdw_bus *bus) +static int qcom_swrm_compute_params(struct sdw_bus *bus, struct sdw_stream_runtime *stream) { struct qcom_swrm_ctrl *ctrl = to_qcom_sdw(bus); struct sdw_master_runtime *m_rt; diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c index 7aa4900dcf31..e9df503332bb 100644 --- a/drivers/soundwire/stream.c +++ b/drivers/soundwire/stream.c @@ -629,8 +629,44 @@ static int sdw_notify_config(struct sdw_master_runtime *m_rt) static int sdw_program_params(struct sdw_bus *bus, bool prepare) { struct sdw_master_runtime *m_rt; + struct sdw_slave *slave; int ret = 0; + u32 addr1; + + /* Check if all Peripherals comply with SDCA */ + list_for_each_entry(slave, &bus->slaves, node) { + if (!slave->dev_num_sticky) + continue; + if (!is_clock_scaling_supported_by_slave(slave)) { + dev_dbg(&slave->dev, "The Peripheral doesn't comply with SDCA\n"); + goto manager_runtime; + } + } + + if (bus->params.next_bank) + addr1 = SDW_SCP_BUSCLOCK_SCALE_B1; + else + addr1 = SDW_SCP_BUSCLOCK_SCALE_B0; + + /* Program SDW_SCP_BUSCLOCK_SCALE if all Peripherals comply with SDCA */ + list_for_each_entry(slave, &bus->slaves, node) { + int scale_index; + u8 base; + + if (!slave->dev_num_sticky) + continue; + scale_index = sdw_slave_get_scale_index(slave, &base); + if (scale_index < 0) + return scale_index; + + ret = sdw_write_no_pm(slave, addr1, scale_index); + if (ret < 0) { + dev_err(&slave->dev, "SDW_SCP_BUSCLOCK_SCALE register write failed\n"); + return ret; + } + } +manager_runtime: list_for_each_entry(m_rt, &bus->m_rt_list, bus_node) { /* @@ -1383,7 +1419,7 @@ static int _sdw_prepare_stream(struct sdw_stream_runtime *stream, /* Compute params */ if (bus->compute_params) { - ret = bus->compute_params(bus); + ret = bus->compute_params(bus, stream); if (ret < 0) { dev_err(bus->dev, "Compute params failed: %d\n", ret); @@ -1642,9 +1678,19 @@ EXPORT_SYMBOL(sdw_disable_stream); static int _sdw_deprepare_stream(struct sdw_stream_runtime *stream) { struct sdw_master_runtime *m_rt; + struct sdw_port_runtime *p_rt; + unsigned int multi_lane_bandwidth; + unsigned int bandwidth; struct sdw_bus *bus; + int state = stream->state; int ret = 0; + /* + * first mark the state as DEPREPARED so that it is not taken into account + * for bit allocation + */ + stream->state = SDW_STREAM_DEPREPARED; + list_for_each_entry(m_rt, &stream->master_list, stream_node) { bus = m_rt->bus; /* De-prepare port(s) */ @@ -1652,19 +1698,34 @@ static int _sdw_deprepare_stream(struct sdw_stream_runtime *stream) if (ret < 0) { dev_err(bus->dev, "De-prepare port(s) failed: %d\n", ret); + stream->state = state; return ret; } + multi_lane_bandwidth = 0; + + list_for_each_entry(p_rt, &m_rt->port_list, port_node) { + if (!p_rt->lane) + continue; + + bandwidth = m_rt->stream->params.rate * hweight32(p_rt->ch_mask) * + m_rt->stream->params.bps; + multi_lane_bandwidth += bandwidth; + bus->lane_used_bandwidth[p_rt->lane] -= bandwidth; + if (!bus->lane_used_bandwidth[p_rt->lane]) + p_rt->lane = 0; + } /* TODO: Update this during Device-Device support */ - bus->params.bandwidth -= m_rt->stream->params.rate * - m_rt->ch_count * m_rt->stream->params.bps; + bandwidth = m_rt->stream->params.rate * m_rt->ch_count * m_rt->stream->params.bps; + bus->params.bandwidth -= bandwidth - multi_lane_bandwidth; /* Compute params */ if (bus->compute_params) { - ret = bus->compute_params(bus); + ret = bus->compute_params(bus, stream); if (ret < 0) { dev_err(bus->dev, "Compute params failed: %d\n", ret); + stream->state = state; return ret; } } @@ -1673,11 +1734,11 @@ static int _sdw_deprepare_stream(struct sdw_stream_runtime *stream) ret = sdw_program_params(bus, false); if (ret < 0) { dev_err(bus->dev, "%s: Program params failed: %d\n", __func__, ret); + stream->state = state; return ret; } } - stream->state = SDW_STREAM_DEPREPARED; return do_bank_switch(stream); } diff --git a/drivers/staging/greybus/camera.c b/drivers/staging/greybus/camera.c index ca71023df447..5d80ace41d8e 100644 --- a/drivers/staging/greybus/camera.c +++ b/drivers/staging/greybus/camera.c @@ -1128,18 +1128,7 @@ done: static int gb_camera_debugfs_open(struct inode *inode, struct file *file) { - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(gb_camera_debugfs_entries); ++i) { - const struct gb_camera_debugfs_entry *entry = - &gb_camera_debugfs_entries[i]; - - if (!strcmp(file->f_path.dentry->d_iname, entry->name)) { - file->private_data = (void *)entry; - break; - } - } - + file->private_data = (void *)debugfs_get_aux(file); return 0; } @@ -1175,8 +1164,8 @@ static int gb_camera_debugfs_init(struct gb_camera *gcam) gcam->debugfs.buffers[i].length = 0; - debugfs_create_file(entry->name, entry->mask, - gcam->debugfs.root, gcam, + debugfs_create_file_aux(entry->name, entry->mask, + gcam->debugfs.root, gcam, entry, &gb_camera_debugfs_ops); } diff --git a/drivers/thunderbolt/retimer.c b/drivers/thunderbolt/retimer.c index eeb64433ebbc..1f25529fe05d 100644 --- a/drivers/thunderbolt/retimer.c +++ b/drivers/thunderbolt/retimer.c @@ -472,7 +472,7 @@ struct tb_retimer_lookup { u8 index; }; -static int retimer_match(struct device *dev, void *data) +static int retimer_match(struct device *dev, const void *data) { const struct tb_retimer_lookup *lookup = data; struct tb_retimer *rt = tb_to_retimer(dev); diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c index 11a50c86a1e4..b0630e6d9472 100644 --- a/drivers/thunderbolt/xdomain.c +++ b/drivers/thunderbolt/xdomain.c @@ -1026,7 +1026,7 @@ static int remove_missing_service(struct device *dev, void *data) return 0; } -static int find_service(struct device *dev, void *data) +static int find_service(struct device *dev, const void *data) { const struct tb_property *p = data; struct tb_service *svc; diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index c472594c3a9f..92f7e752f862 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -2349,9 +2349,9 @@ struct uart_match { struct uart_driver *driver; }; -static int serial_match_port(struct device *dev, void *data) +static int serial_match_port(struct device *dev, const void *data) { - struct uart_match *match = data; + const struct uart_match *match = data; struct tty_driver *tty_drv = match->driver->tty_driver; dev_t devt = MKDEV(tty_drv->major, tty_drv->minor_start) + match->port->line; diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c index 796e37a1d859..3438269a5440 100644 --- a/drivers/ufs/core/ufs-sysfs.c +++ b/drivers/ufs/core/ufs-sysfs.c @@ -1439,6 +1439,7 @@ static ssize_t max_number_of_rtt_store(struct device *dev, struct ufs_hba *hba = dev_get_drvdata(dev); struct ufs_dev_info *dev_info = &hba->dev_info; struct scsi_device *sdev; + unsigned int memflags; unsigned int rtt; int ret; @@ -1458,14 +1459,16 @@ static ssize_t max_number_of_rtt_store(struct device *dev, ufshcd_rpm_get_sync(hba); + memflags = memalloc_noio_save(); shost_for_each_device(sdev, hba->host) - blk_mq_freeze_queue(sdev->request_queue); + blk_mq_freeze_queue_nomemsave(sdev->request_queue); ret = ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_WRITE_ATTR, QUERY_ATTR_IDN_MAX_NUM_OF_RTT, 0, 0, &rtt); shost_for_each_device(sdev, hba->host) - blk_mq_unfreeze_queue(sdev->request_queue); + blk_mq_unfreeze_queue_nomemrestore(sdev->request_queue); + memalloc_noio_restore(memflags); ufshcd_rpm_put_sync(hba); diff --git a/drivers/usb/host/xhci-debugfs.c b/drivers/usb/host/xhci-debugfs.c index 4f0c1b96e208..1f5ef174abea 100644 --- a/drivers/usb/host/xhci-debugfs.c +++ b/drivers/usb/host/xhci-debugfs.c @@ -232,16 +232,7 @@ static struct xhci_file_map ring_files[] = { static int xhci_ring_open(struct inode *inode, struct file *file) { - int i; - struct xhci_file_map *f_map; - const char *file_name = file_dentry(file)->d_iname; - - for (i = 0; i < ARRAY_SIZE(ring_files); i++) { - f_map = &ring_files[i]; - - if (strcmp(f_map->name, file_name) == 0) - break; - } + const struct xhci_file_map *f_map = debugfs_get_aux(file); return single_open(file, f_map->show, inode->i_private); } @@ -318,16 +309,7 @@ static struct xhci_file_map context_files[] = { static int xhci_context_open(struct inode *inode, struct file *file) { - int i; - struct xhci_file_map *f_map; - const char *file_name = file_dentry(file)->d_iname; - - for (i = 0; i < ARRAY_SIZE(context_files); i++) { - f_map = &context_files[i]; - - if (strcmp(f_map->name, file_name) == 0) - break; - } + const struct xhci_file_map *f_map = debugfs_get_aux(file); return single_open(file, f_map->show, inode->i_private); } @@ -410,7 +392,8 @@ static void xhci_debugfs_create_files(struct xhci_hcd *xhci, int i; for (i = 0; i < nentries; i++) - debugfs_create_file(files[i].name, 0444, parent, data, fops); + debugfs_create_file_aux(files[i].name, 0444, parent, + data, &files[i], fops); } static struct dentry *xhci_debugfs_create_ring_dir(struct xhci_hcd *xhci, diff --git a/drivers/usb/mtu3/mtu3_debugfs.c b/drivers/usb/mtu3/mtu3_debugfs.c index 9bd74c505872..c003049bafbf 100644 --- a/drivers/usb/mtu3/mtu3_debugfs.c +++ b/drivers/usb/mtu3/mtu3_debugfs.c @@ -257,16 +257,7 @@ static const struct mtu3_file_map mtu3_ep_files[] = { static int mtu3_ep_open(struct inode *inode, struct file *file) { - const char *file_name = file_dentry(file)->d_iname; - const struct mtu3_file_map *f_map; - int i; - - for (i = 0; i < ARRAY_SIZE(mtu3_ep_files); i++) { - f_map = &mtu3_ep_files[i]; - - if (strcmp(f_map->name, file_name) == 0) - break; - } + const struct mtu3_file_map *f_map = debugfs_get_aux(file); return single_open(file, f_map->show, inode->i_private); } @@ -289,17 +280,8 @@ static const struct debugfs_reg32 mtu3_prb_regs[] = { static int mtu3_probe_show(struct seq_file *sf, void *unused) { - const char *file_name = file_dentry(sf->file)->d_iname; struct mtu3 *mtu = sf->private; - const struct debugfs_reg32 *regs; - int i; - - for (i = 0; i < ARRAY_SIZE(mtu3_prb_regs); i++) { - regs = &mtu3_prb_regs[i]; - - if (strcmp(regs->name, file_name) == 0) - break; - } + const struct debugfs_reg32 *regs = debugfs_get_aux(sf->file); seq_printf(sf, "0x%04x - 0x%08x\n", (u32)regs->offset, mtu3_readl(mtu->ippc_base, (u32)regs->offset)); @@ -315,13 +297,11 @@ static int mtu3_probe_open(struct inode *inode, struct file *file) static ssize_t mtu3_probe_write(struct file *file, const char __user *ubuf, size_t count, loff_t *ppos) { - const char *file_name = file_dentry(file)->d_iname; struct seq_file *sf = file->private_data; struct mtu3 *mtu = sf->private; - const struct debugfs_reg32 *regs; + const struct debugfs_reg32 *regs = debugfs_get_aux(file); char buf[32]; u32 val; - int i; if (copy_from_user(&buf, ubuf, min_t(size_t, sizeof(buf) - 1, count))) return -EFAULT; @@ -329,12 +309,6 @@ static ssize_t mtu3_probe_write(struct file *file, const char __user *ubuf, if (kstrtou32(buf, 0, &val)) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(mtu3_prb_regs); i++) { - regs = &mtu3_prb_regs[i]; - - if (strcmp(regs->name, file_name) == 0) - break; - } mtu3_writel(mtu->ippc_base, (u32)regs->offset, val); return count; @@ -359,8 +333,8 @@ static void mtu3_debugfs_create_prb_files(struct mtu3 *mtu) for (i = 0; i < ARRAY_SIZE(mtu3_prb_regs); i++) { regs = &mtu3_prb_regs[i]; - debugfs_create_file(regs->name, 0644, dir_prb, - mtu, &mtu3_probe_fops); + debugfs_create_file_aux(regs->name, 0644, dir_prb, + mtu, regs, &mtu3_probe_fops); } mtu3_debugfs_regset(mtu, mtu->ippc_base, mtu3_prb_regs, @@ -380,8 +354,8 @@ static void mtu3_debugfs_create_ep_dir(struct mtu3_ep *mep, for (i = 0; i < ARRAY_SIZE(mtu3_ep_files); i++) { files = &mtu3_ep_files[i]; - debugfs_create_file(files->name, 0444, dir_ep, - mep, &mtu3_ep_fops); + debugfs_create_file_aux(files->name, 0444, dir_ep, + mep, files, &mtu3_ep_fops); } } diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index a137e105cc55..9c76c3d0c6cf 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -230,10 +230,10 @@ static const char * const usb_modes[] = { /* ------------------------------------------------------------------------- */ /* Alternate Modes */ -static int altmode_match(struct device *dev, void *data) +static int altmode_match(struct device *dev, const void *data) { struct typec_altmode *adev = to_typec_altmode(dev); - struct typec_device_id *id = data; + const struct typec_device_id *id = data; if (!is_typec_altmode(dev)) return 0; @@ -1284,11 +1284,6 @@ const struct device_type typec_cable_dev_type = { .release = typec_cable_release, }; -static int cable_match(struct device *dev, void *data) -{ - return is_typec_cable(dev); -} - /** * typec_cable_get - Get a reference to the USB Type-C cable * @port: The USB Type-C Port the cable is connected to @@ -1300,7 +1295,8 @@ struct typec_cable *typec_cable_get(struct typec_port *port) { struct device *dev; - dev = device_find_child(&port->dev, NULL, cable_match); + dev = device_find_child(&port->dev, &typec_cable_dev_type, + device_match_type); if (!dev) return NULL; @@ -2030,16 +2026,12 @@ const struct device_type typec_port_dev_type = { /* --------------------------------------- */ /* Driver callbacks to report role updates */ -static int partner_match(struct device *dev, void *data) -{ - return is_typec_partner(dev); -} - static struct typec_partner *typec_get_partner(struct typec_port *port) { struct device *dev; - dev = device_find_child(&port->dev, NULL, partner_match); + dev = device_find_child(&port->dev, &typec_partner_dev_type, + device_match_type); if (!dev) return NULL; @@ -2172,7 +2164,9 @@ void typec_set_pwr_opmode(struct typec_port *port, sysfs_notify(&port->dev.kobj, NULL, "power_operation_mode"); kobject_uevent(&port->dev.kobj, KOBJ_CHANGE); - partner_dev = device_find_child(&port->dev, NULL, partner_match); + partner_dev = device_find_child(&port->dev, + &typec_partner_dev_type, + device_match_type); if (partner_dev) { struct typec_partner *partner = to_typec_partner(partner_dev); @@ -2336,7 +2330,9 @@ int typec_get_negotiated_svdm_version(struct typec_port *port) enum usb_pd_svdm_ver svdm_version; struct device *partner_dev; - partner_dev = device_find_child(&port->dev, NULL, partner_match); + partner_dev = device_find_child(&port->dev, + &typec_partner_dev_type, + device_match_type); if (!partner_dev) return -ENODEV; @@ -2363,7 +2359,8 @@ int typec_get_cable_svdm_version(struct typec_port *port) enum usb_pd_svdm_ver svdm_version; struct device *cable_dev; - cable_dev = device_find_child(&port->dev, NULL, cable_match); + cable_dev = device_find_child(&port->dev, &typec_cable_dev_type, + device_match_type); if (!cable_dev) return -ENODEV; diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c index ed4737de4528..f2e686f8f1ef 100644 --- a/drivers/vfio/mdev/mdev_core.c +++ b/drivers/vfio/mdev/mdev_core.c @@ -76,7 +76,7 @@ int mdev_register_parent(struct mdev_parent *parent, struct device *dev, if (ret) return ret; - ret = class_compat_create_link(mdev_bus_compat_class, dev, NULL); + ret = class_compat_create_link(mdev_bus_compat_class, dev); if (ret) dev_warn(dev, "Failed to create compatibility class link\n"); @@ -98,7 +98,7 @@ void mdev_unregister_parent(struct mdev_parent *parent) dev_info(parent->dev, "MDEV: Unregistering\n"); down_write(&parent->unreg_sem); - class_compat_remove_link(mdev_bus_compat_class, parent->dev, NULL); + class_compat_remove_link(mdev_bus_compat_class, parent->dev); device_for_each_child(parent->dev, NULL, mdev_device_remove_cb); parent_remove_sysfs_files(parent); up_write(&parent->unreg_sem); diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c index a467085038f0..e5ac39c4cc6b 100644 --- a/drivers/vfio/pci/nvgrace-gpu/main.c +++ b/drivers/vfio/pci/nvgrace-gpu/main.c @@ -5,6 +5,8 @@ #include <linux/sizes.h> #include <linux/vfio_pci_core.h> +#include <linux/delay.h> +#include <linux/jiffies.h> /* * The device memory usable to the workloads running in the VM is cached @@ -17,12 +19,21 @@ #define RESMEM_REGION_INDEX VFIO_PCI_BAR2_REGION_INDEX #define USEMEM_REGION_INDEX VFIO_PCI_BAR4_REGION_INDEX -/* Memory size expected as non cached and reserved by the VM driver */ -#define RESMEM_SIZE SZ_1G - /* A hardwired and constant ABI value between the GPU FW and VFIO driver. */ #define MEMBLK_SIZE SZ_512M +#define DVSEC_BITMAP_OFFSET 0xA +#define MIG_SUPPORTED_WITH_CACHED_RESMEM BIT(0) + +#define GPU_CAP_DVSEC_REGISTER 3 + +#define C2C_LINK_BAR0_OFFSET 0x1498 +#define HBM_TRAINING_BAR0_OFFSET 0x200BC +#define STATUS_READY 0xFF + +#define POLL_QUANTUM_MS 1000 +#define POLL_TIMEOUT_MS (30 * 1000) + /* * The state of the two device memory region - resmem and usemem - is * saved as struct mem_region. @@ -46,6 +57,7 @@ struct nvgrace_gpu_pci_core_device { struct mem_region resmem; /* Lock to control device memory kernel mapping */ struct mutex remap_lock; + bool has_mig_hw_bug; }; static void nvgrace_gpu_init_fake_bar_emu_regs(struct vfio_device *core_vdev) @@ -66,7 +78,7 @@ nvgrace_gpu_memregion(int index, if (index == USEMEM_REGION_INDEX) return &nvdev->usemem; - if (index == RESMEM_REGION_INDEX) + if (nvdev->resmem.memlength && index == RESMEM_REGION_INDEX) return &nvdev->resmem; return NULL; @@ -751,40 +763,67 @@ nvgrace_gpu_init_nvdev_struct(struct pci_dev *pdev, u64 memphys, u64 memlength) { int ret = 0; + u64 resmem_size = 0; /* - * The VM GPU device driver needs a non-cacheable region to support - * the MIG feature. Since the device memory is mapped as NORMAL cached, - * carve out a region from the end with a different NORMAL_NC - * property (called as reserved memory and represented as resmem). This - * region then is exposed as a 64b BAR (region 2 and 3) to the VM, while - * exposing the rest (termed as usable memory and represented using usemem) - * as cacheable 64b BAR (region 4 and 5). + * On Grace Hopper systems, the VM GPU device driver needs a non-cacheable + * region to support the MIG feature owing to a hardware bug. Since the + * device memory is mapped as NORMAL cached, carve out a region from the end + * with a different NORMAL_NC property (called as reserved memory and + * represented as resmem). This region then is exposed as a 64b BAR + * (region 2 and 3) to the VM, while exposing the rest (termed as usable + * memory and represented using usemem) as cacheable 64b BAR (region 4 and 5). * * devmem (memlength) * |-------------------------------------------------| * | | * usemem.memphys resmem.memphys + * + * This hardware bug is fixed on the Grace Blackwell platforms and the + * presence of the bug can be determined through nvdev->has_mig_hw_bug. + * Thus on systems with the hardware fix, there is no need to partition + * the GPU device memory and the entire memory is usable and mapped as + * NORMAL cached (i.e. resmem size is 0). */ + if (nvdev->has_mig_hw_bug) + resmem_size = SZ_1G; + nvdev->usemem.memphys = memphys; /* * The device memory exposed to the VM is added to the kernel by the - * VM driver module in chunks of memory block size. Only the usable - * memory (usemem) is added to the kernel for usage by the VM - * workloads. Make the usable memory size memblock aligned. + * VM driver module in chunks of memory block size. Note that only the + * usable memory (usemem) is added to the kernel for usage by the VM + * workloads. */ - if (check_sub_overflow(memlength, RESMEM_SIZE, + if (check_sub_overflow(memlength, resmem_size, &nvdev->usemem.memlength)) { ret = -EOVERFLOW; goto done; } /* - * The USEMEM part of the device memory has to be MEMBLK_SIZE - * aligned. This is a hardwired ABI value between the GPU FW and - * VFIO driver. The VM device driver is also aware of it and make - * use of the value for its calculation to determine USEMEM size. + * The usemem region is exposed as a 64B Bar composed of region 4 and 5. + * Calculate and save the BAR size for the region. + */ + nvdev->usemem.bar_size = roundup_pow_of_two(nvdev->usemem.memlength); + + /* + * If the hardware has the fix for MIG, there is no requirement + * for splitting the device memory to create RESMEM. The entire + * device memory is usable and will be USEMEM. Return here for + * such case. + */ + if (!nvdev->has_mig_hw_bug) + goto done; + + /* + * When the device memory is split to workaround the MIG bug on + * Grace Hopper, the USEMEM part of the device memory has to be + * MEMBLK_SIZE aligned. This is a hardwired ABI value between the + * GPU FW and VFIO driver. The VM device driver is also aware of it + * and make use of the value for its calculation to determine USEMEM + * size. Note that the device memory may not be 512M aligned. */ nvdev->usemem.memlength = round_down(nvdev->usemem.memlength, MEMBLK_SIZE); @@ -803,15 +842,93 @@ nvgrace_gpu_init_nvdev_struct(struct pci_dev *pdev, } /* - * The memory regions are exposed as BARs. Calculate and save - * the BAR size for them. + * The resmem region is exposed as a 64b BAR composed of region 2 and 3 + * for Grace Hopper. Calculate and save the BAR size for the region. */ - nvdev->usemem.bar_size = roundup_pow_of_two(nvdev->usemem.memlength); nvdev->resmem.bar_size = roundup_pow_of_two(nvdev->resmem.memlength); done: return ret; } +static bool nvgrace_gpu_has_mig_hw_bug(struct pci_dev *pdev) +{ + int pcie_dvsec; + u16 dvsec_ctrl16; + + pcie_dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_NVIDIA, + GPU_CAP_DVSEC_REGISTER); + + if (pcie_dvsec) { + pci_read_config_word(pdev, + pcie_dvsec + DVSEC_BITMAP_OFFSET, + &dvsec_ctrl16); + + if (dvsec_ctrl16 & MIG_SUPPORTED_WITH_CACHED_RESMEM) + return false; + } + + return true; +} + +/* + * To reduce the system bootup time, the HBM training has + * been moved out of the UEFI on the Grace-Blackwell systems. + * + * The onus of checking whether the HBM training has completed + * thus falls on the module. The HBM training status can be + * determined from a BAR0 register. + * + * Similarly, another BAR0 register exposes the status of the + * CPU-GPU chip-to-chip (C2C) cache coherent interconnect. + * + * Poll these register and check for 30s. If the HBM training is + * not complete or if the C2C link is not ready, fail the probe. + * + * While the wait is not required on Grace Hopper systems, it + * is beneficial to make the check to ensure the device is in an + * expected state. + * + * Ensure that the BAR0 region is enabled before accessing the + * registers. + */ +static int nvgrace_gpu_wait_device_ready(struct pci_dev *pdev) +{ + unsigned long timeout = jiffies + msecs_to_jiffies(POLL_TIMEOUT_MS); + void __iomem *io; + int ret = -ETIME; + + ret = pci_enable_device(pdev); + if (ret) + return ret; + + ret = pci_request_selected_regions(pdev, 1 << 0, KBUILD_MODNAME); + if (ret) + goto request_region_exit; + + io = pci_iomap(pdev, 0, 0); + if (!io) { + ret = -ENOMEM; + goto iomap_exit; + } + + do { + if ((ioread32(io + C2C_LINK_BAR0_OFFSET) == STATUS_READY) && + (ioread32(io + HBM_TRAINING_BAR0_OFFSET) == STATUS_READY)) { + ret = 0; + goto reg_check_exit; + } + msleep(POLL_QUANTUM_MS); + } while (!time_after(jiffies, timeout)); + +reg_check_exit: + pci_iounmap(pdev, io); +iomap_exit: + pci_release_selected_regions(pdev, 1 << 0); +request_region_exit: + pci_disable_device(pdev); + return ret; +} + static int nvgrace_gpu_probe(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -820,6 +937,10 @@ static int nvgrace_gpu_probe(struct pci_dev *pdev, u64 memphys, memlength; int ret; + ret = nvgrace_gpu_wait_device_ready(pdev); + if (ret) + return ret; + ret = nvgrace_gpu_fetch_memory_property(pdev, &memphys, &memlength); if (!ret) ops = &nvgrace_gpu_pci_ops; @@ -832,6 +953,8 @@ static int nvgrace_gpu_probe(struct pci_dev *pdev, dev_set_drvdata(&pdev->dev, &nvdev->core_device); if (ops == &nvgrace_gpu_pci_ops) { + nvdev->has_mig_hw_bug = nvgrace_gpu_has_mig_hw_bug(pdev); + /* * Device memory properties are identified in the host ACPI * table. Set the nvgrace_gpu_pci_core_device structure. @@ -868,6 +991,8 @@ static const struct pci_device_id nvgrace_gpu_vfio_pci_table[] = { { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_NVIDIA, 0x2345) }, /* GH200 SKU */ { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_NVIDIA, 0x2348) }, + /* GB200 SKU */ + { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_NVIDIA, 0x2941) }, {} }; diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 5572fd99b921..94142581c98c 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -511,13 +511,13 @@ static void vfio_bar_fixup(struct vfio_pci_core_device *vdev) mask = ~(pci_resource_len(pdev, PCI_ROM_RESOURCE) - 1); mask |= PCI_ROM_ADDRESS_ENABLE; *vbar &= cpu_to_le32((u32)mask); - } else if (pdev->resource[PCI_ROM_RESOURCE].flags & - IORESOURCE_ROM_SHADOW) { - mask = ~(0x20000 - 1); + } else if (pdev->rom && pdev->romlen) { + mask = ~(roundup_pow_of_two(pdev->romlen) - 1); mask |= PCI_ROM_ADDRESS_ENABLE; *vbar &= cpu_to_le32((u32)mask); - } else + } else { *vbar = 0; + } vdev->bardirty = false; } diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 1a4ed5a357d3..586e49efb81b 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1054,31 +1054,27 @@ static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev, info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); info.flags = 0; + info.size = 0; - /* Report the BAR size, not the ROM size */ - info.size = pci_resource_len(pdev, info.index); - if (!info.size) { - /* Shadow ROMs appear as PCI option ROMs */ - if (pdev->resource[PCI_ROM_RESOURCE].flags & - IORESOURCE_ROM_SHADOW) - info.size = 0x20000; - else - break; - } - - /* - * Is it really there? Enable memory decode for implicit access - * in pci_map_rom(). - */ - cmd = vfio_pci_memory_lock_and_enable(vdev); - io = pci_map_rom(pdev, &size); - if (io) { + if (pci_resource_start(pdev, PCI_ROM_RESOURCE)) { + /* + * Check ROM content is valid. Need to enable memory + * decode for ROM access in pci_map_rom(). + */ + cmd = vfio_pci_memory_lock_and_enable(vdev); + io = pci_map_rom(pdev, &size); + if (io) { + info.flags = VFIO_REGION_INFO_FLAG_READ; + /* Report the BAR size, not the ROM size. */ + info.size = pci_resource_len(pdev, PCI_ROM_RESOURCE); + pci_unmap_rom(pdev, io); + } + vfio_pci_memory_unlock_and_restore(vdev, cmd); + } else if (pdev->rom && pdev->romlen) { info.flags = VFIO_REGION_INFO_FLAG_READ; - pci_unmap_rom(pdev, io); - } else { - info.size = 0; + /* Report BAR size as power of two. */ + info.size = roundup_pow_of_two(pdev->romlen); } - vfio_pci_memory_unlock_and_restore(vdev, cmd); break; } diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index 66b72c289284..6192788c8ba3 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -16,6 +16,7 @@ #include <linux/io.h> #include <linux/vfio.h> #include <linux/vgaarb.h> +#include <linux/io-64-nonatomic-lo-hi.h> #include "vfio_pci_priv.h" @@ -61,9 +62,7 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_iowrite##size); VFIO_IOWRITE(8) VFIO_IOWRITE(16) VFIO_IOWRITE(32) -#ifdef iowrite64 VFIO_IOWRITE(64) -#endif #define VFIO_IOREAD(size) \ int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev, \ @@ -89,9 +88,7 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_ioread##size); VFIO_IOREAD(8) VFIO_IOREAD(16) VFIO_IOREAD(32) -#ifdef ioread64 VFIO_IOREAD(64) -#endif #define VFIO_IORDWR(size) \ static int vfio_pci_iordwr##size(struct vfio_pci_core_device *vdev,\ @@ -127,9 +124,7 @@ static int vfio_pci_iordwr##size(struct vfio_pci_core_device *vdev,\ VFIO_IORDWR(8) VFIO_IORDWR(16) VFIO_IORDWR(32) -#if defined(ioread64) && defined(iowrite64) VFIO_IORDWR(64) -#endif /* * Read or write from an __iomem region (MMIO or I/O port) with an excluded @@ -155,7 +150,6 @@ ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, else fillable = 0; -#if defined(ioread64) && defined(iowrite64) if (fillable >= 8 && !(off % 8)) { ret = vfio_pci_iordwr64(vdev, iswrite, test_mem, io, buf, off, &filled); @@ -163,7 +157,6 @@ ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, return ret; } else -#endif if (fillable >= 4 && !(off % 4)) { ret = vfio_pci_iordwr32(vdev, iswrite, test_mem, io, buf, off, &filled); @@ -244,9 +237,8 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, if (pci_resource_start(pdev, bar)) end = pci_resource_len(pdev, bar); - else if (bar == PCI_ROM_RESOURCE && - pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW) - end = 0x20000; + else if (bar == PCI_ROM_RESOURCE && pdev->rom && pdev->romlen) + end = roundup_pow_of_two(pdev->romlen); else return -EINVAL; @@ -261,11 +253,14 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, * excluded range at the end of the actual ROM. This makes * filling large ROM BARs much faster. */ - io = pci_map_rom(pdev, &x_start); - if (!io) { - done = -ENOMEM; - goto out; + if (pci_resource_start(pdev, bar)) { + io = pci_map_rom(pdev, &x_start); + } else { + io = ioremap(pdev->rom, pdev->romlen); + x_start = pdev->romlen; } + if (!io) + return -ENOMEM; x_end = end; } else { int ret = vfio_pci_core_setup_barmap(vdev, bar); @@ -288,8 +283,13 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, if (done >= 0) *ppos += done; - if (bar == PCI_ROM_RESOURCE) - pci_unmap_rom(pdev, io); + if (bar == PCI_ROM_RESOURCE) { + if (pci_resource_start(pdev, bar)) + pci_unmap_rom(pdev, io); + else + iounmap(io); + } + out: return done; } @@ -381,12 +381,10 @@ static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd, vfio_pci_core_iowrite32(ioeventfd->vdev, test_mem, ioeventfd->data, ioeventfd->addr); break; -#ifdef iowrite64 case 8: vfio_pci_core_iowrite64(ioeventfd->vdev, test_mem, ioeventfd->data, ioeventfd->addr); break; -#endif } } @@ -440,10 +438,8 @@ int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, pos >= vdev->msix_offset + vdev->msix_size)) return -EINVAL; -#ifndef iowrite64 if (count == 8) return -EINVAL; -#endif ret = vfio_pci_core_setup_barmap(vdev, bar); if (ret) diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index e53757d1d095..3bf1043cd795 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -388,6 +388,11 @@ static ssize_t vfio_platform_read_mmio(struct vfio_platform_region *reg, { unsigned int done = 0; + if (off >= reg->size) + return -EINVAL; + + count = min_t(size_t, count, reg->size - off); + if (!reg->ioaddr) { reg->ioaddr = ioremap(reg->addr, reg->size); @@ -467,6 +472,11 @@ static ssize_t vfio_platform_write_mmio(struct vfio_platform_region *reg, { unsigned int done = 0; + if (off >= reg->size) + return -EINVAL; + + count = min_t(size_t, count, reg->size - off); + if (!reg->ioaddr) { reg->ioaddr = ioremap(reg->addr, reg->size); diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 528395133b4f..163f7f1d70f1 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -84,7 +84,7 @@ module_param(balloon_boot_timeout, uint, 0444); #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG static int xen_hotplug_unpopulated; -static struct ctl_table balloon_table[] = { +static const struct ctl_table balloon_table[] = { { .procname = "hotplug_unpopulated", .data = &xen_hotplug_unpopulated, diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c index c63f317e3df3..093ad4a08672 100644 --- a/drivers/xen/pcpu.c +++ b/drivers/xen/pcpu.c @@ -105,7 +105,7 @@ static ssize_t online_show(struct device *dev, return sprintf(buf, "%u\n", !!(cpu->flags & XEN_PCPU_FLAGS_ONLINE)); } -static ssize_t __ref online_store(struct device *dev, +static ssize_t online_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c index b72ee9379d77..4926d4badc57 100644 --- a/drivers/xen/pvcalls-front.c +++ b/drivers/xen/pvcalls-front.c @@ -341,6 +341,7 @@ int pvcalls_front_socket(struct socket *sock) pvcalls_exit(); return ret; } +EXPORT_SYMBOL_GPL(pvcalls_front_socket); static void free_active_ring(struct sock_mapping *map) { @@ -486,6 +487,7 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr, pvcalls_exit_sock(sock); return ret; } +EXPORT_SYMBOL_GPL(pvcalls_front_connect); static int __write_ring(struct pvcalls_data_intf *intf, struct pvcalls_data *data, @@ -581,6 +583,7 @@ again: pvcalls_exit_sock(sock); return tot_sent; } +EXPORT_SYMBOL_GPL(pvcalls_front_sendmsg); static int __read_ring(struct pvcalls_data_intf *intf, struct pvcalls_data *data, @@ -666,6 +669,7 @@ int pvcalls_front_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, pvcalls_exit_sock(sock); return ret; } +EXPORT_SYMBOL_GPL(pvcalls_front_recvmsg); int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { @@ -719,6 +723,7 @@ int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len) pvcalls_exit_sock(sock); return 0; } +EXPORT_SYMBOL_GPL(pvcalls_front_bind); int pvcalls_front_listen(struct socket *sock, int backlog) { @@ -768,8 +773,10 @@ int pvcalls_front_listen(struct socket *sock, int backlog) pvcalls_exit_sock(sock); return ret; } +EXPORT_SYMBOL_GPL(pvcalls_front_listen); -int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags) +int pvcalls_front_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { struct pvcalls_bedata *bedata; struct sock_mapping *map; @@ -788,7 +795,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags) return -EINVAL; } - nonblock = flags & SOCK_NONBLOCK; + nonblock = arg->flags & SOCK_NONBLOCK; /* * Backend only supports 1 inflight accept request, will return * errors for the others @@ -904,6 +911,7 @@ received: pvcalls_exit_sock(sock); return ret; } +EXPORT_SYMBOL_GPL(pvcalls_front_accept); static __poll_t pvcalls_front_poll_passive(struct file *file, struct pvcalls_bedata *bedata, @@ -1004,6 +1012,7 @@ __poll_t pvcalls_front_poll(struct file *file, struct socket *sock, pvcalls_exit_sock(sock); return ret; } +EXPORT_SYMBOL_GPL(pvcalls_front_poll); int pvcalls_front_release(struct socket *sock) { @@ -1087,6 +1096,7 @@ int pvcalls_front_release(struct socket *sock) pvcalls_exit(); return 0; } +EXPORT_SYMBOL_GPL(pvcalls_front_release); static const struct xenbus_device_id pvcalls_front_ids[] = { { "pvcalls" }, diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h index f694ad77379f..881ef14660bc 100644 --- a/drivers/xen/pvcalls-front.h +++ b/drivers/xen/pvcalls-front.h @@ -12,7 +12,7 @@ int pvcalls_front_bind(struct socket *sock, int pvcalls_front_listen(struct socket *sock, int backlog); int pvcalls_front_accept(struct socket *sock, struct socket *newsock, - int flags); + struct proto_accept_arg *arg); int pvcalls_front_sendmsg(struct socket *sock, struct msghdr *msg, size_t len); diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 698c43dd5dc8..f28bc763847a 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -202,7 +202,7 @@ static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode) return inode->i_sb->s_fs_info; } -static inline struct v9fs_session_info *v9fs_dentry2v9ses(struct dentry *dentry) +static inline struct v9fs_session_info *v9fs_dentry2v9ses(const struct dentry *dentry) { return dentry->d_sb->s_fs_info; } diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index 01338d4c2d9e..5061f192eafd 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -61,7 +61,7 @@ static void v9fs_dentry_release(struct dentry *dentry) p9_fid_put(hlist_entry(p, struct p9_fid, dlist)); } -static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags) +static int __v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags) { struct p9_fid *fid; struct inode *inode; @@ -99,14 +99,36 @@ out_valid: return 1; } +static int v9fs_lookup_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) +{ + return __v9fs_lookup_revalidate(dentry, flags); +} + +static bool v9fs_dentry_unalias_trylock(const struct dentry *dentry) +{ + struct v9fs_session_info *v9ses = v9fs_dentry2v9ses(dentry); + return down_write_trylock(&v9ses->rename_sem); +} + +static void v9fs_dentry_unalias_unlock(const struct dentry *dentry) +{ + struct v9fs_session_info *v9ses = v9fs_dentry2v9ses(dentry); + up_write(&v9ses->rename_sem); +} + const struct dentry_operations v9fs_cached_dentry_operations = { .d_revalidate = v9fs_lookup_revalidate, - .d_weak_revalidate = v9fs_lookup_revalidate, + .d_weak_revalidate = __v9fs_lookup_revalidate, .d_delete = v9fs_cached_dentry_delete, .d_release = v9fs_dentry_release, + .d_unalias_trylock = v9fs_dentry_unalias_trylock, + .d_unalias_unlock = v9fs_dentry_unalias_unlock, }; const struct dentry_operations v9fs_dentry_operations = { .d_delete = always_delete_dentry, .d_release = v9fs_dentry_release, + .d_unalias_trylock = v9fs_dentry_unalias_trylock, + .d_unalias_unlock = v9fs_dentry_unalias_unlock, }; diff --git a/fs/afs/dir.c b/fs/afs/dir.c index a843c36fc471..02cbf38e1a77 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -23,7 +23,8 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); static int afs_dir_open(struct inode *inode, struct file *file); static int afs_readdir(struct file *file, struct dir_context *ctx); -static int afs_d_revalidate(struct dentry *dentry, unsigned int flags); +static int afs_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags); static int afs_d_delete(const struct dentry *dentry); static void afs_d_iput(struct dentry *dentry, struct inode *inode); static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int nlen, @@ -597,19 +598,19 @@ static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name, * Do a lookup of a single name in a directory * - just returns the FID the dentry name maps to if found */ -static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry, +static int afs_do_lookup_one(struct inode *dir, const struct qstr *name, struct afs_fid *fid, afs_dataversion_t *_dir_version) { struct afs_super_info *as = dir->i_sb->s_fs_info; struct afs_lookup_one_cookie cookie = { .ctx.actor = afs_lookup_one_filldir, - .name = dentry->d_name, + .name = *name, .fid.vid = as->volume->vid }; int ret; - _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry); + _enter("{%lu},{%.*s},", dir->i_ino, name->len, name->name); /* search the directory */ ret = afs_dir_iterate(dir, &cookie.ctx, NULL, _dir_version); @@ -1023,21 +1024,12 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, /* * Check the validity of a dentry under RCU conditions. */ -static int afs_d_revalidate_rcu(struct dentry *dentry) +static int afs_d_revalidate_rcu(struct afs_vnode *dvnode, struct dentry *dentry) { - struct afs_vnode *dvnode; - struct dentry *parent; - struct inode *dir; long dir_version, de_version; _enter("%p", dentry); - /* Check the parent directory is still valid first. */ - parent = READ_ONCE(dentry->d_parent); - dir = d_inode_rcu(parent); - if (!dir) - return -ECHILD; - dvnode = AFS_FS_I(dir); if (test_bit(AFS_VNODE_DELETED, &dvnode->flags)) return -ECHILD; @@ -1065,11 +1057,11 @@ static int afs_d_revalidate_rcu(struct dentry *dentry) * - NOTE! the hit can be a negative hit too, so we can't assume we have an * inode */ -static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) +static int afs_d_revalidate(struct inode *parent_dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { - struct afs_vnode *vnode, *dir; + struct afs_vnode *vnode, *dir = AFS_FS_I(parent_dir); struct afs_fid fid; - struct dentry *parent; struct inode *inode; struct key *key; afs_dataversion_t dir_version, invalid_before; @@ -1077,7 +1069,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) int ret; if (flags & LOOKUP_RCU) - return afs_d_revalidate_rcu(dentry); + return afs_d_revalidate_rcu(dir, dentry); if (d_really_is_positive(dentry)) { vnode = AFS_FS_I(d_inode(dentry)); @@ -1092,14 +1084,9 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) if (IS_ERR(key)) key = NULL; - /* Hold the parent dentry so we can peer at it */ - parent = dget_parent(dentry); - dir = AFS_FS_I(d_inode(parent)); - /* validate the parent directory */ ret = afs_validate(dir, key); if (ret == -ERESTARTSYS) { - dput(parent); key_put(key); return ret; } @@ -1127,7 +1114,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) afs_stat_v(dir, n_reval); /* search the directory for this vnode */ - ret = afs_do_lookup_one(&dir->netfs.inode, dentry, &fid, &dir_version); + ret = afs_do_lookup_one(&dir->netfs.inode, name, &fid, &dir_version); switch (ret) { case 0: /* the filename maps to something */ @@ -1171,22 +1158,19 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) goto out_valid; default: - _debug("failed to iterate dir %pd: %d", - parent, ret); + _debug("failed to iterate parent %pd2: %d", dentry, ret); goto not_found; } out_valid: dentry->d_fsdata = (void *)(unsigned long)dir_version; out_valid_noupdate: - dput(parent); key_put(key); _leave(" = 1 [valid]"); return 1; not_found: _debug("dropping dentry %pd2", dentry); - dput(parent); key_put(key); _leave(" = 0 [bad]"); @@ -224,7 +224,7 @@ static unsigned long aio_nr; /* current system wide number of aio requests */ static unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */ /*----end sysctl variables---*/ #ifdef CONFIG_SYSCTL -static struct ctl_table aio_sysctls[] = { +static const struct ctl_table aio_sysctls[] = { { .procname = "aio-nr", .data = &aio_nr, diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 672ca2c1d37d..ca755e8d1a37 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -24,7 +24,10 @@ do { \ } while (0) const char * const bch2_btree_node_flags[] = { -#define x(f) #f, + "typebit", + "typebit", + "typebit", +#define x(f) [BTREE_NODE_##f] = #f, BTREE_FLAGS() #undef x NULL diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 367231ab1980..5988219c6908 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2239,8 +2239,6 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos if (unlikely(ret)) return bkey_s_c_err(ret); - btree_path_set_should_be_locked(trans, trans->paths + iter->key_cache_path); - k = bch2_btree_path_peek_slot(trans->paths + iter->key_cache_path, &u); if (!k.k) return k; @@ -2251,6 +2249,7 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos iter->k = u; k.k = &iter->k; + btree_path_set_should_be_locked(trans, trans->paths + iter->key_cache_path); return k; } diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 3b62296c3100..c378b97ebeca 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -291,8 +291,10 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans, struct btree_path *ck_path, unsigned flags) { - if (flags & BTREE_ITER_cached_nofill) + if (flags & BTREE_ITER_cached_nofill) { + ck_path->l[0].b = NULL; return 0; + } struct bch_fs *c = trans->c; struct btree_iter iter; diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 6b79b672e0b1..2760dd9569ed 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -348,7 +348,7 @@ static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans, unsigned flags) { return bch2_journal_res_get(&trans->c->journal, &trans->journal_res, - trans->journal_u64s, flags); + trans->journal_u64s, flags, trans); } #define JSET_ENTRY_LOG_U64s 4 diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c index f99ff1819597..114bf2f3879f 100644 --- a/fs/bcachefs/compress.c +++ b/fs/bcachefs/compress.c @@ -4,6 +4,7 @@ #include "compress.h" #include "error.h" #include "extents.h" +#include "io_write.h" #include "opts.h" #include "super-io.h" @@ -254,11 +255,14 @@ err: goto out; } -int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio, - struct bch_extent_crc_unpacked *crc) +int bch2_bio_uncompress_inplace(struct bch_write_op *op, + struct bio *bio) { + struct bch_fs *c = op->c; + struct bch_extent_crc_unpacked *crc = &op->crc; struct bbuf data = { NULL }; size_t dst_len = crc->uncompressed_size << 9; + int ret = 0; /* bio must own its pages: */ BUG_ON(!bio->bi_vcnt); @@ -266,17 +270,26 @@ int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio, if (crc->uncompressed_size << 9 > c->opts.encoded_extent_max || crc->compressed_size << 9 > c->opts.encoded_extent_max) { - bch_err(c, "error rewriting existing data: extent too big"); + struct printbuf buf = PRINTBUF; + bch2_write_op_error(&buf, op); + prt_printf(&buf, "error rewriting existing data: extent too big"); + bch_err_ratelimited(c, "%s", buf.buf); + printbuf_exit(&buf); return -EIO; } data = __bounce_alloc(c, dst_len, WRITE); if (__bio_uncompress(c, bio, data.b, *crc)) { - if (!c->opts.no_data_io) - bch_err(c, "error rewriting existing data: decompression error"); - bio_unmap_or_unbounce(c, data); - return -EIO; + if (!c->opts.no_data_io) { + struct printbuf buf = PRINTBUF; + bch2_write_op_error(&buf, op); + prt_printf(&buf, "error rewriting existing data: decompression error"); + bch_err_ratelimited(c, "%s", buf.buf); + printbuf_exit(&buf); + } + ret = -EIO; + goto err; } /* @@ -293,9 +306,9 @@ int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio, crc->uncompressed_size = crc->live_size; crc->offset = 0; crc->csum = (struct bch_csum) { 0, 0 }; - +err: bio_unmap_or_unbounce(c, data); - return 0; + return ret; } int bch2_bio_uncompress(struct bch_fs *c, struct bio *src, diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h index 607fd5e232c9..bec2f05bfd52 100644 --- a/fs/bcachefs/compress.h +++ b/fs/bcachefs/compress.h @@ -47,8 +47,8 @@ static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v) return __bch2_compression_opt_to_type[bch2_compression_decode(v).type]; } -int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *, - struct bch_extent_crc_unpacked *); +struct bch_write_op; +int bch2_bio_uncompress_inplace(struct bch_write_op *, struct bio *); int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *, struct bvec_iter, struct bch_extent_crc_unpacked); unsigned bch2_bio_compress(struct bch_fs *, struct bio *, size_t *, diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 585214931e05..337494facac6 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -91,15 +91,28 @@ static bool bkey_nocow_lock(struct bch_fs *c, struct moving_context *ctxt, struc return true; } -static void trace_move_extent_finish2(struct bch_fs *c, struct bkey_s_c k) +static noinline void trace_move_extent_finish2(struct data_update *u, + struct bkey_i *new, + struct bkey_i *insert) { - if (trace_move_extent_finish_enabled()) { - struct printbuf buf = PRINTBUF; + struct bch_fs *c = u->op.c; + struct printbuf buf = PRINTBUF; - bch2_bkey_val_to_text(&buf, c, k); - trace_move_extent_finish(c, buf.buf); - printbuf_exit(&buf); - } + prt_newline(&buf); + + bch2_data_update_to_text(&buf, u); + prt_newline(&buf); + + prt_str_indented(&buf, "new replicas:\t"); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(new)); + prt_newline(&buf); + + prt_str_indented(&buf, "insert:\t"); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); + prt_newline(&buf); + + trace_move_extent_finish(c, buf.buf); + printbuf_exit(&buf); } static void trace_move_extent_fail2(struct data_update *m, @@ -372,7 +385,8 @@ restart_drop_extra_replicas: bch2_btree_iter_set_pos(&iter, next_pos); this_cpu_add(c->counters[BCH_COUNTER_move_extent_finish], new->k.size); - trace_move_extent_finish2(c, bkey_i_to_s_c(&new->k_i)); + if (trace_move_extent_finish_enabled()) + trace_move_extent_finish2(m, &new->k_i, insert); } err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) @@ -525,34 +539,38 @@ void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, struct data_update_opts *data_opts) { printbuf_tabstop_push(out, 20); - prt_str(out, "rewrite ptrs:\t"); + + prt_str_indented(out, "rewrite ptrs:\t"); bch2_prt_u64_base2(out, data_opts->rewrite_ptrs); prt_newline(out); - prt_str(out, "kill ptrs:\t"); + prt_str_indented(out, "kill ptrs:\t"); bch2_prt_u64_base2(out, data_opts->kill_ptrs); prt_newline(out); - prt_str(out, "target:\t"); + prt_str_indented(out, "target:\t"); bch2_target_to_text(out, c, data_opts->target); prt_newline(out); - prt_str(out, "compression:\t"); + prt_str_indented(out, "compression:\t"); bch2_compression_opt_to_text(out, io_opts->background_compression); prt_newline(out); - prt_str(out, "opts.replicas:\t"); + prt_str_indented(out, "opts.replicas:\t"); prt_u64(out, io_opts->data_replicas); + prt_newline(out); - prt_str(out, "extra replicas:\t"); + prt_str_indented(out, "extra replicas:\t"); prt_u64(out, data_opts->extra_replicas); } void bch2_data_update_to_text(struct printbuf *out, struct data_update *m) { - bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k)); - prt_newline(out); bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts); + prt_newline(out); + + prt_str_indented(out, "old key:\t"); + bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k)); } int bch2_extent_drop_ptrs(struct btree_trans *trans, diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index b5de52a50d10..55333e82d1fe 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -20,6 +20,7 @@ #include "extents.h" #include "fsck.h" #include "inode.h" +#include "journal_reclaim.h" #include "super.h" #include <linux/console.h> diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 3e71860f66b9..dd508d93e9fc 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -406,7 +406,7 @@ static void __bch2_write_op_error(struct printbuf *out, struct bch_write_op *op, op->flags & BCH_WRITE_MOVE ? "(internal move)" : ""); } -static void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op) +void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op) { __bch2_write_op_error(out, op, op->pos.offset); } @@ -873,7 +873,7 @@ static enum prep_encoded_ret { if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io) return PREP_ENCODED_CHECKSUM_ERR; - if (bch2_bio_uncompress_inplace(c, bio, &op->crc)) + if (bch2_bio_uncompress_inplace(op, bio)) return PREP_ENCODED_ERR; } diff --git a/fs/bcachefs/io_write.h b/fs/bcachefs/io_write.h index 5400ce94ee57..b4626013abc8 100644 --- a/fs/bcachefs/io_write.h +++ b/fs/bcachefs/io_write.h @@ -20,6 +20,8 @@ static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *, enum bch_data_type, const struct bkey_i *, bool); +void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op); + #define BCH_WRITE_FLAGS() \ x(ALLOC_NOWAIT) \ x(CACHED) \ diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 2cd20114b74b..cb2c3722f674 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -113,11 +113,10 @@ journal_seq_to_buf(struct journal *j, u64 seq) static void journal_pin_list_init(struct journal_entry_pin_list *p, int count) { - unsigned i; - - for (i = 0; i < ARRAY_SIZE(p->list); i++) - INIT_LIST_HEAD(&p->list[i]); - INIT_LIST_HEAD(&p->flushed); + for (unsigned i = 0; i < ARRAY_SIZE(p->unflushed); i++) + INIT_LIST_HEAD(&p->unflushed[i]); + for (unsigned i = 0; i < ARRAY_SIZE(p->flushed); i++) + INIT_LIST_HEAD(&p->flushed[i]); atomic_set(&p->count, count); p->devs.nr = 0; } @@ -601,6 +600,16 @@ out: : -BCH_ERR_journal_res_get_blocked; } +static unsigned max_dev_latency(struct bch_fs *c) +{ + u64 nsecs = 0; + + for_each_rw_member(c, ca) + nsecs = max(nsecs, ca->io_latency[WRITE].stats.max_duration); + + return nsecs_to_jiffies(nsecs); +} + /* * Essentially the entry function to the journaling code. When bcachefs is doing * a btree insert, it calls this function to get the current journal write. @@ -612,17 +621,31 @@ out: * btree node write locks. */ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res, - unsigned flags) + unsigned flags, + struct btree_trans *trans) { int ret; if (closure_wait_event_timeout(&j->async_wait, (ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked || (flags & JOURNAL_RES_GET_NONBLOCK), - HZ * 10)) + HZ)) return ret; + if (trans) + bch2_trans_unlock_long(trans); + struct bch_fs *c = container_of(j, struct bch_fs, journal); + int remaining_wait = max(max_dev_latency(c) * 2, HZ * 10); + + remaining_wait = max(0, remaining_wait - HZ); + + if (closure_wait_event_timeout(&j->async_wait, + (ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked || + (flags & JOURNAL_RES_GET_NONBLOCK), + remaining_wait)) + return ret; + struct printbuf buf = PRINTBUF; bch2_journal_debug_to_text(&buf, j); bch_err(c, "Journal stuck? Waited for 10 seconds...\n%s", @@ -727,7 +750,7 @@ recheck_need_open: * livelock: */ sched_annotate_sleep(); - ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0); + ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0, NULL); if (ret) return ret; @@ -848,7 +871,7 @@ out: static int __bch2_journal_meta(struct journal *j) { struct journal_res res = {}; - int ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0); + int ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0, NULL); if (ret) return ret; @@ -1602,54 +1625,3 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) __bch2_journal_debug_to_text(out, j); spin_unlock(&j->lock); } - -bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 *seq) -{ - struct journal_entry_pin_list *pin_list; - struct journal_entry_pin *pin; - - spin_lock(&j->lock); - if (!test_bit(JOURNAL_running, &j->flags)) { - spin_unlock(&j->lock); - return true; - } - - *seq = max(*seq, j->pin.front); - - if (*seq >= j->pin.back) { - spin_unlock(&j->lock); - return true; - } - - out->atomic++; - - pin_list = journal_seq_pin(j, *seq); - - prt_printf(out, "%llu: count %u\n", *seq, atomic_read(&pin_list->count)); - printbuf_indent_add(out, 2); - - for (unsigned i = 0; i < ARRAY_SIZE(pin_list->list); i++) - list_for_each_entry(pin, &pin_list->list[i], list) - prt_printf(out, "\t%px %ps\n", pin, pin->flush); - - if (!list_empty(&pin_list->flushed)) - prt_printf(out, "flushed:\n"); - - list_for_each_entry(pin, &pin_list->flushed, list) - prt_printf(out, "\t%px %ps\n", pin, pin->flush); - - printbuf_indent_sub(out, 2); - - --out->atomic; - spin_unlock(&j->lock); - - return false; -} - -void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j) -{ - u64 seq = 0; - - while (!bch2_journal_seq_pins_to_text(out, j, &seq)) - seq++; -} diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index cb0df0663946..dccddd5420ad 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -312,7 +312,7 @@ static inline void bch2_journal_res_put(struct journal *j, } int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *, - unsigned); + unsigned, struct btree_trans *); /* First bits for BCH_WATERMARK: */ enum journal_res_flags { @@ -368,7 +368,8 @@ static inline int journal_res_get_fast(struct journal *j, } static inline int bch2_journal_res_get(struct journal *j, struct journal_res *res, - unsigned u64s, unsigned flags) + unsigned u64s, unsigned flags, + struct btree_trans *trans) { int ret; @@ -380,7 +381,7 @@ static inline int bch2_journal_res_get(struct journal *j, struct journal_res *re if (journal_res_get_fast(j, res, flags)) goto out; - ret = bch2_journal_res_get_slowpath(j, res, flags); + ret = bch2_journal_res_get_slowpath(j, res, flags, trans); if (ret) return ret; out: @@ -429,8 +430,6 @@ struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *, u void __bch2_journal_debug_to_text(struct printbuf *, struct journal *); void bch2_journal_debug_to_text(struct printbuf *, struct journal *); -void bch2_journal_pins_to_text(struct printbuf *, struct journal *); -bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *); int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *, unsigned nr); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 7f2efe85a805..11c39e0c34f4 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -17,6 +17,7 @@ #include "sb-clean.h" #include "trace.h" +#include <linux/ioprio.h> #include <linux/string_choices.h> void bch2_journal_pos_from_member_info_set(struct bch_fs *c) @@ -1763,6 +1764,7 @@ static CLOSURE_CALLBACK(journal_write_submit) bio->bi_iter.bi_sector = ptr->offset; bio->bi_end_io = journal_write_endio; bio->bi_private = ca; + bio->bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 0); BUG_ON(bio->bi_iter.bi_sector == ca->prev_journal_sector); ca->prev_journal_sector = bio->bi_iter.bi_sector; diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 3c8242606da7..6a9cefb635d6 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -327,8 +327,10 @@ void bch2_journal_reclaim_fast(struct journal *j) popped = true; } - if (popped) + if (popped) { bch2_journal_space_available(j); + __closure_wake_up(&j->reclaim_flush_wait); + } } bool __bch2_journal_pin_put(struct journal *j, u64 seq) @@ -362,6 +364,9 @@ static inline bool __journal_pin_drop(struct journal *j, pin->seq = 0; list_del_init(&pin->list); + if (j->reclaim_flush_wait.list.first) + __closure_wake_up(&j->reclaim_flush_wait); + /* * Unpinning a journal entry may make journal_next_bucket() succeed, if * writing a new last_seq will now make another bucket available: @@ -383,11 +388,11 @@ static enum journal_pin_type journal_pin_type(journal_pin_flush_fn fn) { if (fn == bch2_btree_node_flush0 || fn == bch2_btree_node_flush1) - return JOURNAL_PIN_btree; + return JOURNAL_PIN_TYPE_btree; else if (fn == bch2_btree_key_cache_journal_flush) - return JOURNAL_PIN_key_cache; + return JOURNAL_PIN_TYPE_key_cache; else - return JOURNAL_PIN_other; + return JOURNAL_PIN_TYPE_other; } static inline void bch2_journal_pin_set_locked(struct journal *j, u64 seq, @@ -406,7 +411,12 @@ static inline void bch2_journal_pin_set_locked(struct journal *j, u64 seq, atomic_inc(&pin_list->count); pin->seq = seq; pin->flush = flush_fn; - list_add(&pin->list, &pin_list->list[type]); + + if (list_empty(&pin_list->unflushed[type]) && + j->reclaim_flush_wait.list.first) + __closure_wake_up(&j->reclaim_flush_wait); + + list_add(&pin->list, &pin_list->unflushed[type]); } void bch2_journal_pin_copy(struct journal *j, @@ -499,16 +509,15 @@ journal_get_next_pin(struct journal *j, { struct journal_entry_pin_list *pin_list; struct journal_entry_pin *ret = NULL; - unsigned i; fifo_for_each_entry_ptr(pin_list, &j->pin, *seq) { if (*seq > seq_to_flush && !allowed_above_seq) break; - for (i = 0; i < JOURNAL_PIN_NR; i++) - if ((((1U << i) & allowed_below_seq) && *seq <= seq_to_flush) || - ((1U << i) & allowed_above_seq)) { - ret = list_first_entry_or_null(&pin_list->list[i], + for (unsigned i = 0; i < JOURNAL_PIN_TYPE_NR; i++) + if (((BIT(i) & allowed_below_seq) && *seq <= seq_to_flush) || + (BIT(i) & allowed_above_seq)) { + ret = list_first_entry_or_null(&pin_list->unflushed[i], struct journal_entry_pin, list); if (ret) return ret; @@ -544,8 +553,8 @@ static size_t journal_flush_pins(struct journal *j, } if (min_key_cache) { - allowed_above |= 1U << JOURNAL_PIN_key_cache; - allowed_below |= 1U << JOURNAL_PIN_key_cache; + allowed_above |= BIT(JOURNAL_PIN_TYPE_key_cache); + allowed_below |= BIT(JOURNAL_PIN_TYPE_key_cache); } cond_resched(); @@ -553,7 +562,9 @@ static size_t journal_flush_pins(struct journal *j, j->last_flushed = jiffies; spin_lock(&j->lock); - pin = journal_get_next_pin(j, seq_to_flush, allowed_below, allowed_above, &seq); + pin = journal_get_next_pin(j, seq_to_flush, + allowed_below, + allowed_above, &seq); if (pin) { BUG_ON(j->flush_in_progress); j->flush_in_progress = pin; @@ -576,7 +587,7 @@ static size_t journal_flush_pins(struct journal *j, spin_lock(&j->lock); /* Pin might have been dropped or rearmed: */ if (likely(!err && !j->flush_in_progress_dropped)) - list_move(&pin->list, &journal_seq_pin(j, seq)->flushed); + list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(flush_fn)]); j->flush_in_progress = NULL; j->flush_in_progress_dropped = false; spin_unlock(&j->lock); @@ -816,10 +827,41 @@ int bch2_journal_reclaim_start(struct journal *j) return 0; } +static bool journal_pins_still_flushing(struct journal *j, u64 seq_to_flush, + unsigned types) +{ + struct journal_entry_pin_list *pin_list; + u64 seq; + + spin_lock(&j->lock); + fifo_for_each_entry_ptr(pin_list, &j->pin, seq) { + if (seq > seq_to_flush) + break; + + for (unsigned i = 0; i < JOURNAL_PIN_TYPE_NR; i++) + if ((BIT(i) & types) && + (!list_empty(&pin_list->unflushed[i]) || + !list_empty(&pin_list->flushed[i]))) { + spin_unlock(&j->lock); + return true; + } + } + spin_unlock(&j->lock); + + return false; +} + +static bool journal_flush_pins_or_still_flushing(struct journal *j, u64 seq_to_flush, + unsigned types) +{ + return journal_flush_pins(j, seq_to_flush, types, 0, 0, 0) || + journal_pins_still_flushing(j, seq_to_flush, types); +} + static int journal_flush_done(struct journal *j, u64 seq_to_flush, bool *did_work) { - int ret; + int ret = 0; ret = bch2_journal_error(j); if (ret) @@ -827,12 +869,18 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush, mutex_lock(&j->reclaim_lock); - if (journal_flush_pins(j, seq_to_flush, - (1U << JOURNAL_PIN_key_cache)| - (1U << JOURNAL_PIN_other), 0, 0, 0) || - journal_flush_pins(j, seq_to_flush, - (1U << JOURNAL_PIN_btree), 0, 0, 0)) + if (journal_flush_pins_or_still_flushing(j, seq_to_flush, + BIT(JOURNAL_PIN_TYPE_key_cache)| + BIT(JOURNAL_PIN_TYPE_other))) { + *did_work = true; + goto unlock; + } + + if (journal_flush_pins_or_still_flushing(j, seq_to_flush, + BIT(JOURNAL_PIN_TYPE_btree))) { *did_work = true; + goto unlock; + } if (seq_to_flush > journal_cur_seq(j)) bch2_journal_entry_close(j); @@ -847,6 +895,7 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush, !fifo_used(&j->pin); spin_unlock(&j->lock); +unlock: mutex_unlock(&j->reclaim_lock); return ret; @@ -860,7 +909,7 @@ bool bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush) if (!test_bit(JOURNAL_running, &j->flags)) return false; - closure_wait_event(&j->async_wait, + closure_wait_event(&j->reclaim_flush_wait, journal_flush_done(j, seq_to_flush, &did_work)); return did_work; @@ -926,3 +975,54 @@ err: return ret; } + +bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 *seq) +{ + struct journal_entry_pin_list *pin_list; + struct journal_entry_pin *pin; + + spin_lock(&j->lock); + if (!test_bit(JOURNAL_running, &j->flags)) { + spin_unlock(&j->lock); + return true; + } + + *seq = max(*seq, j->pin.front); + + if (*seq >= j->pin.back) { + spin_unlock(&j->lock); + return true; + } + + out->atomic++; + + pin_list = journal_seq_pin(j, *seq); + + prt_printf(out, "%llu: count %u\n", *seq, atomic_read(&pin_list->count)); + printbuf_indent_add(out, 2); + + prt_printf(out, "unflushed:\n"); + for (unsigned i = 0; i < ARRAY_SIZE(pin_list->unflushed); i++) + list_for_each_entry(pin, &pin_list->unflushed[i], list) + prt_printf(out, "\t%px %ps\n", pin, pin->flush); + + prt_printf(out, "flushed:\n"); + for (unsigned i = 0; i < ARRAY_SIZE(pin_list->flushed); i++) + list_for_each_entry(pin, &pin_list->flushed[i], list) + prt_printf(out, "\t%px %ps\n", pin, pin->flush); + + printbuf_indent_sub(out, 2); + + --out->atomic; + spin_unlock(&j->lock); + + return false; +} + +void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j) +{ + u64 seq = 0; + + while (!bch2_journal_seq_pins_to_text(out, j, &seq)) + seq++; +} diff --git a/fs/bcachefs/journal_reclaim.h b/fs/bcachefs/journal_reclaim.h index ec84c3345281..0a73d7134e1c 100644 --- a/fs/bcachefs/journal_reclaim.h +++ b/fs/bcachefs/journal_reclaim.h @@ -78,4 +78,7 @@ static inline bool bch2_journal_flush_all_pins(struct journal *j) int bch2_journal_flush_device_pins(struct journal *, int); +void bch2_journal_pins_to_text(struct printbuf *, struct journal *); +bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *); + #endif /* _BCACHEFS_JOURNAL_RECLAIM_H */ diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index e9bd716fbb71..3ba433a48eb8 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -53,15 +53,15 @@ struct journal_buf { */ enum journal_pin_type { - JOURNAL_PIN_btree, - JOURNAL_PIN_key_cache, - JOURNAL_PIN_other, - JOURNAL_PIN_NR, + JOURNAL_PIN_TYPE_btree, + JOURNAL_PIN_TYPE_key_cache, + JOURNAL_PIN_TYPE_other, + JOURNAL_PIN_TYPE_NR, }; struct journal_entry_pin_list { - struct list_head list[JOURNAL_PIN_NR]; - struct list_head flushed; + struct list_head unflushed[JOURNAL_PIN_TYPE_NR]; + struct list_head flushed[JOURNAL_PIN_TYPE_NR]; atomic_t count; struct bch_devs_list devs; }; @@ -226,6 +226,7 @@ struct journal { /* Used when waiting because the journal was full */ wait_queue_head_t wait; struct closure_waitlist async_wait; + struct closure_waitlist reclaim_flush_wait; struct delayed_work write_work; struct workqueue_struct *wq; diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 85c361e78ba5..21805509ab9e 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -215,7 +215,8 @@ static int bch2_copygc(struct moving_context *ctxt, }; move_buckets buckets = { 0 }; struct move_bucket_in_flight *f; - u64 moved = atomic64_read(&ctxt->stats->sectors_moved); + u64 sectors_seen = atomic64_read(&ctxt->stats->sectors_seen); + u64 sectors_moved = atomic64_read(&ctxt->stats->sectors_moved); int ret = 0; ret = bch2_copygc_get_buckets(ctxt, buckets_in_flight, &buckets); @@ -245,7 +246,6 @@ static int bch2_copygc(struct moving_context *ctxt, *did_work = true; } err: - darray_exit(&buckets); /* no entries in LRU btree found, or got to end: */ if (bch2_err_matches(ret, ENOENT)) @@ -254,8 +254,11 @@ err: if (ret < 0 && !bch2_err_matches(ret, EROFS)) bch_err_msg(c, ret, "from bch2_move_data()"); - moved = atomic64_read(&ctxt->stats->sectors_moved) - moved; - trace_and_count(c, copygc, c, moved, 0, 0, 0); + sectors_seen = atomic64_read(&ctxt->stats->sectors_seen) - sectors_seen; + sectors_moved = atomic64_read(&ctxt->stats->sectors_moved) - sectors_moved; + trace_and_count(c, copygc, c, buckets.nr, sectors_seen, sectors_moved); + + darray_exit(&buckets); return ret; } diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index e763d52e0f38..a182b5d454ba 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -476,13 +476,13 @@ enum fsck_err_opts { NULL, "Enable nocow mode: enables runtime locking in\n"\ "data move path needed if nocow will ever be in use\n")\ x(copygc_enabled, u8, \ - OPT_FS|OPT_MOUNT, \ + OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ BCH2_NO_SB_OPT, true, \ NULL, "Enable copygc: disable for debugging, or to\n"\ "quiet the system when doing performance testing\n")\ x(rebalance_enabled, u8, \ - OPT_FS|OPT_MOUNT, \ + OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ BCH2_NO_SB_OPT, true, \ NULL, "Enable rebalance: disable for debugging, or to\n"\ diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 0b4fe899209b..ea0a18364751 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -57,7 +57,7 @@ enum bch_fsck_flags { x(bset_wrong_sector_offset, 44, 0) \ x(bset_empty, 45, 0) \ x(bset_bad_seq, 46, 0) \ - x(bset_blacklisted_journal_seq, 47, 0) \ + x(bset_blacklisted_journal_seq, 47, FSCK_AUTOFIX) \ x(first_bset_blacklisted_journal_seq, 48, FSCK_AUTOFIX) \ x(btree_node_bad_btree, 49, 0) \ x(btree_node_bad_level, 50, 0) \ diff --git a/fs/bcachefs/str_hash.c b/fs/bcachefs/str_hash.c index 8c2c5539de2e..d78451c2a0c6 100644 --- a/fs/bcachefs/str_hash.c +++ b/fs/bcachefs/str_hash.c @@ -31,11 +31,11 @@ static int bch2_dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dir } } -static int fsck_rename_dirent(struct btree_trans *trans, - struct snapshots_seen *s, - const struct bch_hash_desc desc, - struct bch_hash_info *hash_info, - struct bkey_s_c_dirent old) +static noinline int fsck_rename_dirent(struct btree_trans *trans, + struct snapshots_seen *s, + const struct bch_hash_desc desc, + struct bch_hash_info *hash_info, + struct bkey_s_c_dirent old) { struct qstr old_name = bch2_dirent_get_name(old); struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, bkey_bytes(old.k) + 32); @@ -71,11 +71,11 @@ static int fsck_rename_dirent(struct btree_trans *trans, return bch2_fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i); } -static int hash_pick_winner(struct btree_trans *trans, - const struct bch_hash_desc desc, - struct bch_hash_info *hash_info, - struct bkey_s_c k1, - struct bkey_s_c k2) +static noinline int hash_pick_winner(struct btree_trans *trans, + const struct bch_hash_desc desc, + struct bch_hash_info *hash_info, + struct bkey_s_c k1, + struct bkey_s_c k2) { if (bkey_val_bytes(k1.k) == bkey_val_bytes(k2.k) && !memcmp(k1.v, k2.v, bkey_val_bytes(k1.k))) @@ -142,8 +142,8 @@ fsck_err: * All versions of the same inode in different snapshots must have the same hash * seed/type: verify that the hash info we're using matches the root */ -static int check_inode_hash_info_matches_root(struct btree_trans *trans, u64 inum, - struct bch_hash_info *hash_info) +static noinline int check_inode_hash_info_matches_root(struct btree_trans *trans, u64 inum, + struct bch_hash_info *hash_info) { struct bch_fs *c = trans->c; struct btree_iter iter; diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 9d40b7d4ea29..56a5a7fbc0fd 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -902,32 +902,30 @@ TRACE_EVENT(evacuate_bucket, TRACE_EVENT(copygc, TP_PROTO(struct bch_fs *c, - u64 sectors_moved, u64 sectors_not_moved, - u64 buckets_moved, u64 buckets_not_moved), - TP_ARGS(c, - sectors_moved, sectors_not_moved, - buckets_moved, buckets_not_moved), + u64 buckets, + u64 sectors_seen, + u64 sectors_moved), + TP_ARGS(c, buckets, sectors_seen, sectors_moved), TP_STRUCT__entry( __field(dev_t, dev ) + __field(u64, buckets ) + __field(u64, sectors_seen ) __field(u64, sectors_moved ) - __field(u64, sectors_not_moved ) - __field(u64, buckets_moved ) - __field(u64, buckets_not_moved ) ), TP_fast_assign( __entry->dev = c->dev; + __entry->buckets = buckets; + __entry->sectors_seen = sectors_seen; __entry->sectors_moved = sectors_moved; - __entry->sectors_not_moved = sectors_not_moved; - __entry->buckets_moved = buckets_moved; - __entry->buckets_not_moved = buckets_moved; ), - TP_printk("%d,%d sectors moved %llu remain %llu buckets moved %llu remain %llu", + TP_printk("%d,%d buckets %llu sectors seen %llu moved %llu", MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->sectors_moved, __entry->sectors_not_moved, - __entry->buckets_moved, __entry->buckets_not_moved) + __entry->buckets, + __entry->sectors_seen, + __entry->sectors_moved) ); TRACE_EVENT(copygc_wait, diff --git a/fs/cachefiles/error_inject.c b/fs/cachefiles/error_inject.c index 1715d5ca2b2d..e341ade47dd8 100644 --- a/fs/cachefiles/error_inject.c +++ b/fs/cachefiles/error_inject.c @@ -11,7 +11,7 @@ unsigned int cachefiles_error_injection_state; static struct ctl_table_header *cachefiles_sysctl; -static struct ctl_table cachefiles_sysctls[] = { +static const struct ctl_table cachefiles_sysctls[] = { { .procname = "error_injection", .data = &cachefiles_error_injection_state, diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index fdf9dc15eafa..fdd404fc8112 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -412,7 +412,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc) void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) { - char name[100]; + char name[NAME_MAX]; doutc(fsc->client, "begin\n"); fsc->debugfs_congestion_kb = diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 0bf388e07a02..62e99e65250d 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1940,29 +1940,19 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry, /* * Check if cached dentry can be trusted. */ -static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) +static int ceph_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(dentry->d_sb)->mdsc; struct ceph_client *cl = mdsc->fsc->client; int valid = 0; - struct dentry *parent; - struct inode *dir, *inode; + struct inode *inode; - valid = fscrypt_d_revalidate(dentry, flags); + valid = fscrypt_d_revalidate(dir, name, dentry, flags); if (valid <= 0) return valid; - if (flags & LOOKUP_RCU) { - parent = READ_ONCE(dentry->d_parent); - dir = d_inode_rcu(parent); - if (!dir) - return -ECHILD; - inode = d_inode_rcu(dentry); - } else { - parent = dget_parent(dentry); - dir = d_inode(parent); - inode = d_inode(dentry); - } + inode = d_inode_rcu(dentry); doutc(cl, "%p '%pd' inode %p offset 0x%llx nokey %d\n", dentry, dentry, inode, ceph_dentry(dentry)->offset, @@ -2008,6 +1998,8 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) req->r_parent = dir; ihold(dir); + req->r_dname = name; + mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED; if (ceph_security_xattr_wanted(dir)) mask |= CEPH_CAP_XATTR_SHARED; @@ -2038,9 +2030,6 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) doutc(cl, "%p '%pd' %s\n", dentry, dentry, valid ? "valid" : "invalid"); if (!valid) ceph_dir_clear_complete(dir); - - if (!(flags & LOOKUP_RCU)) - dput(parent); return valid; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 785fe489ef4b..54b3421501e9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2621,6 +2621,7 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen) { struct inode *dir = req->r_parent; struct dentry *dentry = req->r_dentry; + const struct qstr *name = req->r_dname; u8 *cryptbuf = NULL; u32 len = 0; int ret = 0; @@ -2641,8 +2642,10 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen) if (!fscrypt_has_encryption_key(dir)) goto success; - if (!fscrypt_fname_encrypted_size(dir, dentry->d_name.len, NAME_MAX, - &len)) { + if (!name) + name = &dentry->d_name; + + if (!fscrypt_fname_encrypted_size(dir, name->len, NAME_MAX, &len)) { WARN_ON_ONCE(1); return ERR_PTR(-ENAMETOOLONG); } @@ -2657,7 +2660,7 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen) if (!cryptbuf) return ERR_PTR(-ENOMEM); - ret = fscrypt_fname_encrypt(dir, &dentry->d_name, cryptbuf, len); + ret = fscrypt_fname_encrypt(dir, name, cryptbuf, len); if (ret) { kfree(cryptbuf); return ERR_PTR(ret); @@ -2945,12 +2948,12 @@ static struct ceph_mds_request_head_legacy * find_legacy_request_head(void *p, u64 features) { bool legacy = !(features & CEPH_FEATURE_FS_BTIME); - struct ceph_mds_request_head_old *ohead; + struct ceph_mds_request_head *head; if (legacy) return (struct ceph_mds_request_head_legacy *)p; - ohead = (struct ceph_mds_request_head_old *)p; - return (struct ceph_mds_request_head_legacy *)&ohead->oldest_client_tid; + head = (struct ceph_mds_request_head *)p; + return (struct ceph_mds_request_head_legacy *)&head->oldest_client_tid; } /* @@ -3020,7 +3023,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, if (legacy) len = sizeof(struct ceph_mds_request_head_legacy); else if (request_head_version == 1) - len = sizeof(struct ceph_mds_request_head_old); + len = offsetofend(struct ceph_mds_request_head, args); else if (request_head_version == 2) len = offsetofend(struct ceph_mds_request_head, ext_num_fwd); else @@ -3104,11 +3107,11 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, msg->hdr.version = cpu_to_le16(3); p = msg->front.iov_base + sizeof(*lhead); } else if (request_head_version == 1) { - struct ceph_mds_request_head_old *ohead = msg->front.iov_base; + struct ceph_mds_request_head *nhead = msg->front.iov_base; msg->hdr.version = cpu_to_le16(4); - ohead->version = cpu_to_le16(1); - p = msg->front.iov_base + sizeof(*ohead); + nhead->version = cpu_to_le16(1); + p = msg->front.iov_base + offsetofend(struct ceph_mds_request_head, args); } else if (request_head_version == 2) { struct ceph_mds_request_head *nhead = msg->front.iov_base; @@ -3265,7 +3268,7 @@ static int __prepare_send_request(struct ceph_mds_session *session, * so we limit to retry at most 256 times. */ if (req->r_attempts) { - old_max_retry = sizeof_field(struct ceph_mds_request_head_old, + old_max_retry = sizeof_field(struct ceph_mds_request_head, num_retry); old_max_retry = 1 << (old_max_retry * BITS_PER_BYTE); if ((old_version && req->r_attempts >= old_max_retry) || @@ -5690,18 +5693,18 @@ static int ceph_mds_auth_match(struct ceph_mds_client *mdsc, * * All the other cases --> mismatch */ + bool path_matched = true; char *first = strstr(_tpath, auth->match.path); - if (first != _tpath) { - if (free_tpath) - kfree(_tpath); - return 0; + if (first != _tpath || + (tlen > len && _tpath[len] != '/')) { + path_matched = false; } - if (tlen > len && _tpath[len] != '/') { - if (free_tpath) - kfree(_tpath); + if (free_tpath) + kfree(_tpath); + + if (!path_matched) return 0; - } } } diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 38bb7e0d2d79..7c9fee9e80d4 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -299,6 +299,8 @@ struct ceph_mds_request { struct inode *r_target_inode; /* resulting inode */ struct inode *r_new_inode; /* new inode (for creates) */ + const struct qstr *r_dname; /* stable name (for ->d_revalidate) */ + #define CEPH_MDS_R_DIRECT_IS_HASH (1) /* r_direct_hash is valid */ #define CEPH_MDS_R_ABORTED (2) /* call was aborted */ #define CEPH_MDS_R_GOT_UNSAFE (3) /* got an unsafe reply */ diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 4e552ba7bd43..a3e2dfeedfbf 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -445,7 +445,8 @@ static int coda_readdir(struct file *coda_file, struct dir_context *ctx) } /* called when a cache lookup succeeds */ -static int coda_dentry_revalidate(struct dentry *de, unsigned int flags) +static int coda_dentry_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *de, unsigned int flags) { struct inode *inode; struct coda_inode_info *cii; diff --git a/fs/coda/sysctl.c b/fs/coda/sysctl.c index 9f2d5743e2c8..0df46f09b6cc 100644 --- a/fs/coda/sysctl.c +++ b/fs/coda/sysctl.c @@ -14,7 +14,7 @@ static struct ctl_table_header *fs_table_header; -static struct ctl_table coda_table[] = { +static const struct ctl_table coda_table[] = { { .procname = "timeout", .data = &coda_timeout, diff --git a/fs/coredump.c b/fs/coredump.c index d48edb37bc35..591700e1b2ce 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -995,7 +995,7 @@ static int proc_dostring_coredump(const struct ctl_table *table, int write, static const unsigned int core_file_note_size_min = CORE_FILE_NOTE_SIZE_DEFAULT; static const unsigned int core_file_note_size_max = CORE_FILE_NOTE_SIZE_MAX; -static struct ctl_table coredump_sysctls[] = { +static const struct ctl_table coredump_sysctls[] = { { .procname = "core_uses_pid", .data = &core_uses_pid, diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 0ad52fbe51c9..010f9c0a4c2f 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -574,11 +574,10 @@ EXPORT_SYMBOL_GPL(fscrypt_fname_siphash); * Validate dentries in encrypted directories to make sure we aren't potentially * caching stale dentries after a key has been added. */ -int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) +int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { - struct dentry *dir; int err; - int valid; /* * Plaintext names are always valid, since fscrypt doesn't support @@ -591,30 +590,21 @@ int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) /* * No-key name; valid if the directory's key is still unavailable. * - * Although fscrypt forbids rename() on no-key names, we still must use - * dget_parent() here rather than use ->d_parent directly. That's - * because a corrupted fs image may contain directory hard links, which - * the VFS handles by moving the directory's dentry tree in the dcache - * each time ->lookup() finds the directory and it already has a dentry - * elsewhere. Thus ->d_parent can be changing, and we must safely grab - * a reference to some ->d_parent to prevent it from being freed. + * Note in RCU mode we have to bail if we get here - + * fscrypt_get_encryption_info() may block. */ if (flags & LOOKUP_RCU) return -ECHILD; - dir = dget_parent(dentry); /* * Pass allow_unsupported=true, so that files with an unsupported * encryption policy can be deleted. */ - err = fscrypt_get_encryption_info(d_inode(dir), true); - valid = !fscrypt_has_encryption_key(d_inode(dir)); - dput(dir); - + err = fscrypt_get_encryption_info(dir, true); if (err < 0) return err; - return valid; + return !fscrypt_has_encryption_key(dir); } EXPORT_SYMBOL_GPL(fscrypt_d_revalidate); diff --git a/fs/dcache.c b/fs/dcache.c index 1a01d7a6a7a9..9cc0d47da321 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -192,7 +192,7 @@ static int proc_nr_dentry(const struct ctl_table *table, int write, void *buffer return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } -static struct ctl_table fs_dcache_sysctls[] = { +static const struct ctl_table fs_dcache_sysctls[] = { { .procname = "dentry-state", .data = &dentry_stat, @@ -295,12 +295,16 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c return dentry_string_cmp(cs, ct, tcount); } +/* + * long names are allocated separately from dentry and never modified. + * Refcounted, freeing is RCU-delayed. See take_dentry_name_snapshot() + * for the reason why ->count and ->head can't be combined into a union. + * dentry_string_cmp() relies upon ->name[] being word-aligned. + */ struct external_name { - union { - atomic_t count; - struct rcu_head head; - } u; - unsigned char name[]; + atomic_t count; + struct rcu_head head; + unsigned char name[] __aligned(sizeof(unsigned long)); }; static inline struct external_name *external_name(struct dentry *dentry) @@ -324,31 +328,45 @@ static void __d_free_external(struct rcu_head *head) static inline int dname_external(const struct dentry *dentry) { - return dentry->d_name.name != dentry->d_iname; + return dentry->d_name.name != dentry->d_shortname.string; } void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry) { - spin_lock(&dentry->d_lock); - name->name = dentry->d_name; - if (unlikely(dname_external(dentry))) { - atomic_inc(&external_name(dentry)->u.count); + unsigned seq; + const unsigned char *s; + + rcu_read_lock(); +retry: + seq = read_seqcount_begin(&dentry->d_seq); + s = READ_ONCE(dentry->d_name.name); + name->name.hash_len = dentry->d_name.hash_len; + name->name.name = name->inline_name.string; + if (likely(s == dentry->d_shortname.string)) { + name->inline_name = dentry->d_shortname; } else { - memcpy(name->inline_name, dentry->d_iname, - dentry->d_name.len + 1); - name->name.name = name->inline_name; + struct external_name *p; + p = container_of(s, struct external_name, name[0]); + // get a valid reference + if (unlikely(!atomic_inc_not_zero(&p->count))) + goto retry; + name->name.name = s; } - spin_unlock(&dentry->d_lock); + if (read_seqcount_retry(&dentry->d_seq, seq)) { + release_dentry_name_snapshot(name); + goto retry; + } + rcu_read_unlock(); } EXPORT_SYMBOL(take_dentry_name_snapshot); void release_dentry_name_snapshot(struct name_snapshot *name) { - if (unlikely(name->name.name != name->inline_name)) { + if (unlikely(name->name.name != name->inline_name.string)) { struct external_name *p; p = container_of(name->name.name, struct external_name, name[0]); - if (unlikely(atomic_dec_and_test(&p->u.count))) - kfree_rcu(p, u.head); + if (unlikely(atomic_dec_and_test(&p->count))) + kfree_rcu(p, head); } } EXPORT_SYMBOL(release_dentry_name_snapshot); @@ -386,7 +404,7 @@ static void dentry_free(struct dentry *dentry) WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias)); if (unlikely(dname_external(dentry))) { struct external_name *p = external_name(dentry); - if (likely(atomic_dec_and_test(&p->u.count))) { + if (likely(atomic_dec_and_test(&p->count))) { call_rcu(&dentry->d_u.d_rcu, __d_free_external); return; } @@ -1654,10 +1672,10 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) * will still always have a NUL at the end, even if we might * be overwriting an internal NUL character */ - dentry->d_iname[DNAME_INLINE_LEN-1] = 0; + dentry->d_shortname.string[DNAME_INLINE_LEN-1] = 0; if (unlikely(!name)) { name = &slash_name; - dname = dentry->d_iname; + dname = dentry->d_shortname.string; } else if (name->len > DNAME_INLINE_LEN-1) { size_t size = offsetof(struct external_name, name[1]); struct external_name *p = kmalloc(size + name->len, @@ -1667,10 +1685,10 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) kmem_cache_free(dentry_cache, dentry); return NULL; } - atomic_set(&p->u.count, 1); + atomic_set(&p->count, 1); dname = p->name; } else { - dname = dentry->d_iname; + dname = dentry->d_shortname.string; } dentry->d_name.len = name->len; @@ -2728,10 +2746,9 @@ static void swap_names(struct dentry *dentry, struct dentry *target) * dentry:internal, target:external. Steal target's * storage and make target internal. */ - memcpy(target->d_iname, dentry->d_name.name, - dentry->d_name.len + 1); dentry->d_name.name = target->d_name.name; - target->d_name.name = target->d_iname; + target->d_shortname = dentry->d_shortname; + target->d_name.name = target->d_shortname.string; } } else { if (unlikely(dname_external(dentry))) { @@ -2739,20 +2756,16 @@ static void swap_names(struct dentry *dentry, struct dentry *target) * dentry:external, target:internal. Give dentry's * storage to target and make dentry internal */ - memcpy(dentry->d_iname, target->d_name.name, - target->d_name.len + 1); target->d_name.name = dentry->d_name.name; - dentry->d_name.name = dentry->d_iname; + dentry->d_shortname = target->d_shortname; + dentry->d_name.name = dentry->d_shortname.string; } else { /* * Both are internal. */ - unsigned int i; - BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long))); - for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) { - swap(((long *) &dentry->d_iname)[i], - ((long *) &target->d_iname)[i]); - } + for (int i = 0; i < DNAME_INLINE_WORDS; i++) + swap(dentry->d_shortname.words[i], + target->d_shortname.words[i]); } } swap(dentry->d_name.hash_len, target->d_name.hash_len); @@ -2764,16 +2777,15 @@ static void copy_name(struct dentry *dentry, struct dentry *target) if (unlikely(dname_external(dentry))) old_name = external_name(dentry); if (unlikely(dname_external(target))) { - atomic_inc(&external_name(target)->u.count); + atomic_inc(&external_name(target)->count); dentry->d_name = target->d_name; } else { - memcpy(dentry->d_iname, target->d_name.name, - target->d_name.len + 1); - dentry->d_name.name = dentry->d_iname; + dentry->d_shortname = target->d_shortname; + dentry->d_name.name = dentry->d_shortname.string; dentry->d_name.hash_len = target->d_name.hash_len; } - if (old_name && likely(atomic_dec_and_test(&old_name->u.count))) - kfree_rcu(old_name, u.head); + if (old_name && likely(atomic_dec_and_test(&old_name->count))) + kfree_rcu(old_name, head); } /* @@ -2954,7 +2966,12 @@ static int __d_unalias(struct dentry *dentry, struct dentry *alias) goto out_err; m2 = &alias->d_parent->d_inode->i_rwsem; out_unalias: + if (alias->d_op->d_unalias_trylock && + !alias->d_op->d_unalias_trylock(alias)) + goto out_err; __d_move(alias, dentry, false); + if (alias->d_op->d_unalias_unlock) + alias->d_op->d_unalias_unlock(alias); ret = 0; out_err: if (m2) @@ -3102,12 +3119,12 @@ void d_mark_tmpfile(struct file *file, struct inode *inode) { struct dentry *dentry = file->f_path.dentry; - BUG_ON(dentry->d_name.name != dentry->d_iname || + BUG_ON(dname_external(dentry) || !hlist_unhashed(&dentry->d_u.d_alias) || !d_unlinked(dentry)); spin_lock(&dentry->d_parent->d_lock); spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - dentry->d_name.len = sprintf(dentry->d_iname, "#%llu", + dentry->d_name.len = sprintf(dentry->d_shortname.string, "#%llu", (unsigned long long)inode->i_ino); spin_unlock(&dentry->d_lock); spin_unlock(&dentry->d_parent->d_lock); @@ -3195,7 +3212,7 @@ static void __init dcache_init(void) */ dentry_cache = KMEM_CACHE_USERCOPY(dentry, SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_ACCOUNT, - d_iname); + d_shortname.string); /* Hash may have been set up in dcache_init_early */ if (!hashdist) diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 16e198a26339..e33cc77699cd 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -47,11 +47,17 @@ const struct file_operations debugfs_noop_file_operations = { #define F_DENTRY(filp) ((filp)->f_path.dentry) +const void *debugfs_get_aux(const struct file *file) +{ + return DEBUGFS_I(file_inode(file))->aux; +} +EXPORT_SYMBOL_GPL(debugfs_get_aux); + const struct file_operations *debugfs_real_fops(const struct file *filp) { struct debugfs_fsdata *fsd = F_DENTRY(filp)->d_fsdata; - if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT) { + if (!fsd) { /* * Urgh, we've been called w/o a protecting * debugfs_file_get(). @@ -84,9 +90,11 @@ static int __debugfs_file_get(struct dentry *dentry, enum dbgfs_get_mode mode) return -EINVAL; d_fsd = READ_ONCE(dentry->d_fsdata); - if (!((unsigned long)d_fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)) { + if (d_fsd) { fsd = d_fsd; } else { + struct inode *inode = dentry->d_inode; + if (WARN_ON(mode == DBGFS_GET_ALREADY)) return -EINVAL; @@ -95,23 +103,38 @@ static int __debugfs_file_get(struct dentry *dentry, enum dbgfs_get_mode mode) return -ENOMEM; if (mode == DBGFS_GET_SHORT) { - fsd->real_fops = NULL; - fsd->short_fops = (void *)((unsigned long)d_fsd & - ~DEBUGFS_FSDATA_IS_REAL_FOPS_BIT); + const struct debugfs_short_fops *ops; + ops = fsd->short_fops = DEBUGFS_I(inode)->short_fops; + if (ops->llseek) + fsd->methods |= HAS_LSEEK; + if (ops->read) + fsd->methods |= HAS_READ; + if (ops->write) + fsd->methods |= HAS_WRITE; } else { - fsd->real_fops = (void *)((unsigned long)d_fsd & - ~DEBUGFS_FSDATA_IS_REAL_FOPS_BIT); - fsd->short_fops = NULL; + const struct file_operations *ops; + ops = fsd->real_fops = DEBUGFS_I(inode)->real_fops; + if (ops->llseek) + fsd->methods |= HAS_LSEEK; + if (ops->read) + fsd->methods |= HAS_READ; + if (ops->write) + fsd->methods |= HAS_WRITE; + if (ops->unlocked_ioctl) + fsd->methods |= HAS_IOCTL; + if (ops->poll) + fsd->methods |= HAS_POLL; } refcount_set(&fsd->active_users, 1); init_completion(&fsd->active_users_drained); INIT_LIST_HEAD(&fsd->cancellations); mutex_init(&fsd->cancellations_mtx); - if (cmpxchg(&dentry->d_fsdata, d_fsd, fsd) != d_fsd) { + d_fsd = cmpxchg(&dentry->d_fsdata, NULL, fsd); + if (d_fsd) { mutex_destroy(&fsd->cancellations_mtx); kfree(fsd); - fsd = READ_ONCE(dentry->d_fsdata); + fsd = d_fsd; } } @@ -208,8 +231,7 @@ void debugfs_enter_cancellation(struct file *file, return; fsd = READ_ONCE(dentry->d_fsdata); - if (WARN_ON(!fsd || - ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT))) + if (WARN_ON(!fsd)) return; mutex_lock(&fsd->cancellations_mtx); @@ -240,8 +262,7 @@ void debugfs_leave_cancellation(struct file *file, return; fsd = READ_ONCE(dentry->d_fsdata); - if (WARN_ON(!fsd || - ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT))) + if (WARN_ON(!fsd)) return; mutex_lock(&fsd->cancellations_mtx); @@ -322,13 +343,16 @@ const struct file_operations debugfs_open_proxy_file_operations = { #define PROTO(args...) args #define ARGS(args...) args -#define FULL_PROXY_FUNC(name, ret_type, filp, proto, args) \ +#define FULL_PROXY_FUNC(name, ret_type, filp, proto, args, bit, ret) \ static ret_type full_proxy_ ## name(proto) \ { \ - struct dentry *dentry = F_DENTRY(filp); \ + struct dentry *dentry = F_DENTRY(filp); \ + struct debugfs_fsdata *fsd = dentry->d_fsdata; \ const struct file_operations *real_fops; \ ret_type r; \ \ + if (!(fsd->methods & bit)) \ + return ret; \ r = debugfs_file_get(dentry); \ if (unlikely(r)) \ return r; \ @@ -338,17 +362,18 @@ static ret_type full_proxy_ ## name(proto) \ return r; \ } -#define FULL_PROXY_FUNC_BOTH(name, ret_type, filp, proto, args) \ +#define FULL_PROXY_FUNC_BOTH(name, ret_type, filp, proto, args, bit, ret) \ static ret_type full_proxy_ ## name(proto) \ { \ struct dentry *dentry = F_DENTRY(filp); \ - struct debugfs_fsdata *fsd; \ + struct debugfs_fsdata *fsd = dentry->d_fsdata; \ ret_type r; \ \ + if (!(fsd->methods & bit)) \ + return ret; \ r = debugfs_file_get(dentry); \ if (unlikely(r)) \ return r; \ - fsd = dentry->d_fsdata; \ if (fsd->real_fops) \ r = fsd->real_fops->name(args); \ else \ @@ -359,29 +384,32 @@ static ret_type full_proxy_ ## name(proto) \ FULL_PROXY_FUNC_BOTH(llseek, loff_t, filp, PROTO(struct file *filp, loff_t offset, int whence), - ARGS(filp, offset, whence)); + ARGS(filp, offset, whence), HAS_LSEEK, -ESPIPE); FULL_PROXY_FUNC_BOTH(read, ssize_t, filp, PROTO(struct file *filp, char __user *buf, size_t size, loff_t *ppos), - ARGS(filp, buf, size, ppos)); + ARGS(filp, buf, size, ppos), HAS_READ, -EINVAL); FULL_PROXY_FUNC_BOTH(write, ssize_t, filp, PROTO(struct file *filp, const char __user *buf, size_t size, loff_t *ppos), - ARGS(filp, buf, size, ppos)); + ARGS(filp, buf, size, ppos), HAS_WRITE, -EINVAL); FULL_PROXY_FUNC(unlocked_ioctl, long, filp, PROTO(struct file *filp, unsigned int cmd, unsigned long arg), - ARGS(filp, cmd, arg)); + ARGS(filp, cmd, arg), HAS_IOCTL, -ENOTTY); static __poll_t full_proxy_poll(struct file *filp, struct poll_table_struct *wait) { struct dentry *dentry = F_DENTRY(filp); + struct debugfs_fsdata *fsd = dentry->d_fsdata; __poll_t r = 0; const struct file_operations *real_fops; + if (!(fsd->methods & HAS_POLL)) + return DEFAULT_POLLMASK; if (debugfs_file_get(dentry)) return EPOLLHUP; @@ -393,9 +421,7 @@ static __poll_t full_proxy_poll(struct file *filp, static int full_proxy_release(struct inode *inode, struct file *filp) { - const struct dentry *dentry = F_DENTRY(filp); const struct file_operations *real_fops = debugfs_real_fops(filp); - const struct file_operations *proxy_fops = filp->f_op; int r = 0; /* @@ -404,49 +430,21 @@ static int full_proxy_release(struct inode *inode, struct file *filp) * not to leak any resources. Releasers must not assume that * ->i_private is still being meaningful here. */ - if (real_fops && real_fops->release) + if (real_fops->release) r = real_fops->release(inode, filp); - replace_fops(filp, d_inode(dentry)->i_fop); - kfree(proxy_fops); fops_put(real_fops); return r; } -static void __full_proxy_fops_init(struct file_operations *proxy_fops, - struct debugfs_fsdata *fsd) -{ - proxy_fops->release = full_proxy_release; - - if ((fsd->real_fops && fsd->real_fops->llseek) || - (fsd->short_fops && fsd->short_fops->llseek)) - proxy_fops->llseek = full_proxy_llseek; - - if ((fsd->real_fops && fsd->real_fops->read) || - (fsd->short_fops && fsd->short_fops->read)) - proxy_fops->read = full_proxy_read; - - if ((fsd->real_fops && fsd->real_fops->write) || - (fsd->short_fops && fsd->short_fops->write)) - proxy_fops->write = full_proxy_write; - - if (fsd->real_fops && fsd->real_fops->poll) - proxy_fops->poll = full_proxy_poll; - - if (fsd->real_fops && fsd->real_fops->unlocked_ioctl) - proxy_fops->unlocked_ioctl = full_proxy_unlocked_ioctl; -} - -static int full_proxy_open(struct inode *inode, struct file *filp, - enum dbgfs_get_mode mode) +static int full_proxy_open_regular(struct inode *inode, struct file *filp) { struct dentry *dentry = F_DENTRY(filp); const struct file_operations *real_fops; - struct file_operations *proxy_fops = NULL; struct debugfs_fsdata *fsd; int r; - r = __debugfs_file_get(dentry, mode); + r = __debugfs_file_get(dentry, DBGFS_GET_REGULAR); if (r) return r == -EIO ? -ENOENT : r; @@ -456,7 +454,7 @@ static int full_proxy_open(struct inode *inode, struct file *filp, if (r) goto out; - if (real_fops && !fops_get(real_fops)) { + if (!fops_get(real_fops)) { #ifdef CONFIG_MODULES if (real_fops->owner && real_fops->owner->state == MODULE_STATE_GOING) { @@ -472,55 +470,52 @@ static int full_proxy_open(struct inode *inode, struct file *filp, goto out; } - proxy_fops = kzalloc(sizeof(*proxy_fops), GFP_KERNEL); - if (!proxy_fops) { - r = -ENOMEM; - goto free_proxy; - } - __full_proxy_fops_init(proxy_fops, fsd); - replace_fops(filp, proxy_fops); - - if (!real_fops || real_fops->open) { - if (real_fops) - r = real_fops->open(inode, filp); - else - r = simple_open(inode, filp); + if (real_fops->open) { + r = real_fops->open(inode, filp); if (r) { - replace_fops(filp, d_inode(dentry)->i_fop); - goto free_proxy; - } else if (filp->f_op != proxy_fops) { + fops_put(real_fops); + } else if (filp->f_op != &debugfs_full_proxy_file_operations) { /* No protection against file removal anymore. */ WARN(1, "debugfs file owner replaced proxy fops: %pd", dentry); - goto free_proxy; + fops_put(real_fops); } } - - goto out; -free_proxy: - kfree(proxy_fops); - fops_put(real_fops); out: debugfs_file_put(dentry); return r; } -static int full_proxy_open_regular(struct inode *inode, struct file *filp) -{ - return full_proxy_open(inode, filp, DBGFS_GET_REGULAR); -} - const struct file_operations debugfs_full_proxy_file_operations = { .open = full_proxy_open_regular, + .release = full_proxy_release, + .llseek = full_proxy_llseek, + .read = full_proxy_read, + .write = full_proxy_write, + .poll = full_proxy_poll, + .unlocked_ioctl = full_proxy_unlocked_ioctl }; static int full_proxy_open_short(struct inode *inode, struct file *filp) { - return full_proxy_open(inode, filp, DBGFS_GET_SHORT); + struct dentry *dentry = F_DENTRY(filp); + int r; + + r = __debugfs_file_get(dentry, DBGFS_GET_SHORT); + if (r) + return r == -EIO ? -ENOENT : r; + r = debugfs_locked_down(inode, filp, NULL); + if (!r) + r = simple_open(inode, filp); + debugfs_file_put(dentry); + return r; } const struct file_operations debugfs_full_short_proxy_file_operations = { .open = full_proxy_open_short, + .llseek = full_proxy_llseek, + .read = full_proxy_read, + .write = full_proxy_write, }; ssize_t debugfs_attr_read(struct file *file, char __user *buf, diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index e752009de929..75715d8877ee 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -208,16 +208,34 @@ static int debugfs_show_options(struct seq_file *m, struct dentry *root) return 0; } +static struct kmem_cache *debugfs_inode_cachep __ro_after_init; + +static void init_once(void *foo) +{ + struct debugfs_inode_info *info = foo; + inode_init_once(&info->vfs_inode); +} + +static struct inode *debugfs_alloc_inode(struct super_block *sb) +{ + struct debugfs_inode_info *info; + info = alloc_inode_sb(sb, debugfs_inode_cachep, GFP_KERNEL); + if (!info) + return NULL; + return &info->vfs_inode; +} + static void debugfs_free_inode(struct inode *inode) { if (S_ISLNK(inode->i_mode)) kfree(inode->i_link); - free_inode_nonrcu(inode); + kmem_cache_free(debugfs_inode_cachep, DEBUGFS_I(inode)); } static const struct super_operations debugfs_super_operations = { .statfs = simple_statfs, .show_options = debugfs_show_options, + .alloc_inode = debugfs_alloc_inode, .free_inode = debugfs_free_inode, }; @@ -225,23 +243,18 @@ static void debugfs_release_dentry(struct dentry *dentry) { struct debugfs_fsdata *fsd = dentry->d_fsdata; - if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT) - return; - - /* check it wasn't a dir (no fsdata) or automount (no real_fops) */ - if (fsd && (fsd->real_fops || fsd->short_fops)) { + if (fsd) { WARN_ON(!list_empty(&fsd->cancellations)); mutex_destroy(&fsd->cancellations_mtx); } - kfree(fsd); } static struct vfsmount *debugfs_automount(struct path *path) { - struct debugfs_fsdata *fsd = path->dentry->d_fsdata; + struct inode *inode = path->dentry->d_inode; - return fsd->automount(path->dentry, d_inode(path->dentry)->i_private); + return DEBUGFS_I(inode)->automount(path->dentry, inode->i_private); } static const struct dentry_operations debugfs_dops = { @@ -411,6 +424,7 @@ static struct dentry *end_creating(struct dentry *dentry) static struct dentry *__debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, + const void *aux, const struct file_operations *proxy_fops, const void *real_fops) { @@ -441,9 +455,11 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode, inode->i_private = data; inode->i_op = &debugfs_file_inode_operations; + if (!real_fops) + proxy_fops = &debugfs_noop_file_operations; inode->i_fop = proxy_fops; - dentry->d_fsdata = (void *)((unsigned long)real_fops | - DEBUGFS_FSDATA_IS_REAL_FOPS_BIT); + DEBUGFS_I(inode)->raw = real_fops; + DEBUGFS_I(inode)->aux = aux; d_instantiate(dentry, inode); fsnotify_create(d_inode(dentry->d_parent), dentry); @@ -452,30 +468,22 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode, struct dentry *debugfs_create_file_full(const char *name, umode_t mode, struct dentry *parent, void *data, + const void *aux, const struct file_operations *fops) { - if (WARN_ON((unsigned long)fops & - DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)) - return ERR_PTR(-EINVAL); - - return __debugfs_create_file(name, mode, parent, data, - fops ? &debugfs_full_proxy_file_operations : - &debugfs_noop_file_operations, + return __debugfs_create_file(name, mode, parent, data, aux, + &debugfs_full_proxy_file_operations, fops); } EXPORT_SYMBOL_GPL(debugfs_create_file_full); struct dentry *debugfs_create_file_short(const char *name, umode_t mode, - struct dentry *parent, void *data, - const struct debugfs_short_fops *fops) + struct dentry *parent, void *data, + const void *aux, + const struct debugfs_short_fops *fops) { - if (WARN_ON((unsigned long)fops & - DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)) - return ERR_PTR(-EINVAL); - - return __debugfs_create_file(name, mode, parent, data, - fops ? &debugfs_full_short_proxy_file_operations : - &debugfs_noop_file_operations, + return __debugfs_create_file(name, mode, parent, data, aux, + &debugfs_full_short_proxy_file_operations, fops); } EXPORT_SYMBOL_GPL(debugfs_create_file_short); @@ -512,9 +520,8 @@ struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode, const struct file_operations *fops) { - return __debugfs_create_file(name, mode, parent, data, - fops ? &debugfs_open_proxy_file_operations : - &debugfs_noop_file_operations, + return __debugfs_create_file(name, mode, parent, data, NULL, + &debugfs_open_proxy_file_operations, fops); } EXPORT_SYMBOL_GPL(debugfs_create_file_unsafe); @@ -624,23 +631,13 @@ struct dentry *debugfs_create_automount(const char *name, void *data) { struct dentry *dentry = start_creating(name, parent); - struct debugfs_fsdata *fsd; struct inode *inode; if (IS_ERR(dentry)) return dentry; - fsd = kzalloc(sizeof(*fsd), GFP_KERNEL); - if (!fsd) { - failed_creating(dentry); - return ERR_PTR(-ENOMEM); - } - - fsd->automount = f; - if (!(debugfs_allow & DEBUGFS_ALLOW_API)) { failed_creating(dentry); - kfree(fsd); return ERR_PTR(-EPERM); } @@ -648,14 +645,13 @@ struct dentry *debugfs_create_automount(const char *name, if (unlikely(!inode)) { pr_err("out of free dentries, can not create automount '%s'\n", name); - kfree(fsd); return failed_creating(dentry); } make_empty_dir_inode(inode); inode->i_flags |= S_AUTOMOUNT; inode->i_private = data; - dentry->d_fsdata = fsd; + DEBUGFS_I(inode)->automount = f; /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); d_instantiate(dentry, inode); @@ -730,7 +726,7 @@ static void __debugfs_file_removed(struct dentry *dentry) */ smp_mb(); fsd = READ_ONCE(dentry->d_fsdata); - if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT) + if (!fsd) return; /* if this was the last reference, we're done */ @@ -834,76 +830,70 @@ void debugfs_lookup_and_remove(const char *name, struct dentry *parent) EXPORT_SYMBOL_GPL(debugfs_lookup_and_remove); /** - * debugfs_rename - rename a file/directory in the debugfs filesystem - * @old_dir: a pointer to the parent dentry for the renamed object. This - * should be a directory dentry. - * @old_dentry: dentry of an object to be renamed. - * @new_dir: a pointer to the parent dentry where the object should be - * moved. This should be a directory dentry. - * @new_name: a pointer to a string containing the target name. + * debugfs_change_name - rename a file/directory in the debugfs filesystem + * @dentry: dentry of an object to be renamed. + * @fmt: format for new name * * This function renames a file/directory in debugfs. The target must not * exist for rename to succeed. * - * This function will return a pointer to old_dentry (which is updated to - * reflect renaming) if it succeeds. If an error occurs, ERR_PTR(-ERROR) - * will be returned. + * This function will return 0 on success and -E... on failure. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. */ -struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, - struct dentry *new_dir, const char *new_name) +int __printf(2, 3) debugfs_change_name(struct dentry *dentry, const char *fmt, ...) { - int error; - struct dentry *dentry = NULL, *trap; + int error = 0; + const char *new_name; struct name_snapshot old_name; + struct dentry *parent, *target; + struct inode *dir; + va_list ap; - if (IS_ERR(old_dir)) - return old_dir; - if (IS_ERR(new_dir)) - return new_dir; - if (IS_ERR_OR_NULL(old_dentry)) - return old_dentry; - - trap = lock_rename(new_dir, old_dir); - /* Source or destination directories don't exist? */ - if (d_really_is_negative(old_dir) || d_really_is_negative(new_dir)) - goto exit; - /* Source does not exist, cyclic rename, or mountpoint? */ - if (d_really_is_negative(old_dentry) || old_dentry == trap || - d_mountpoint(old_dentry)) - goto exit; - dentry = lookup_one_len(new_name, new_dir, strlen(new_name)); - /* Lookup failed, cyclic rename or target exists? */ - if (IS_ERR(dentry) || dentry == trap || d_really_is_positive(dentry)) - goto exit; - - take_dentry_name_snapshot(&old_name, old_dentry); - - error = simple_rename(&nop_mnt_idmap, d_inode(old_dir), old_dentry, - d_inode(new_dir), dentry, 0); - if (error) { - release_dentry_name_snapshot(&old_name); - goto exit; + if (IS_ERR_OR_NULL(dentry)) + return 0; + + va_start(ap, fmt); + new_name = kvasprintf_const(GFP_KERNEL, fmt, ap); + va_end(ap); + if (!new_name) + return -ENOMEM; + + parent = dget_parent(dentry); + dir = d_inode(parent); + inode_lock(dir); + + take_dentry_name_snapshot(&old_name, dentry); + + if (WARN_ON_ONCE(dentry->d_parent != parent)) { + error = -EINVAL; + goto out; } - d_move(old_dentry, dentry); - fsnotify_move(d_inode(old_dir), d_inode(new_dir), &old_name.name, - d_is_dir(old_dentry), - NULL, old_dentry); + if (strcmp(old_name.name.name, new_name) == 0) + goto out; + target = lookup_one_len(new_name, parent, strlen(new_name)); + if (IS_ERR(target)) { + error = PTR_ERR(target); + goto out; + } + if (d_really_is_positive(target)) { + dput(target); + error = -EINVAL; + goto out; + } + simple_rename_timestamp(dir, dentry, dir, target); + d_move(dentry, target); + dput(target); + fsnotify_move(dir, dir, &old_name.name, d_is_dir(dentry), NULL, dentry); +out: release_dentry_name_snapshot(&old_name); - unlock_rename(new_dir, old_dir); - dput(dentry); - return old_dentry; -exit: - if (dentry && !IS_ERR(dentry)) - dput(dentry); - unlock_rename(new_dir, old_dir); - if (IS_ERR(dentry)) - return dentry; - return ERR_PTR(-EINVAL); + inode_unlock(dir); + dput(parent); + kfree_const(new_name); + return error; } -EXPORT_SYMBOL_GPL(debugfs_rename); +EXPORT_SYMBOL_GPL(debugfs_change_name); /** * debugfs_initialized - Tells whether debugfs has been registered @@ -939,12 +929,22 @@ static int __init debugfs_init(void) if (retval) return retval; - retval = register_filesystem(&debug_fs_type); - if (retval) + debugfs_inode_cachep = kmem_cache_create("debugfs_inode_cache", + sizeof(struct debugfs_inode_info), 0, + SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT, + init_once); + if (debugfs_inode_cachep == NULL) { sysfs_remove_mount_point(kernel_kobj, "debug"); - else - debugfs_registered = true; + return -ENOMEM; + } - return retval; + retval = register_filesystem(&debug_fs_type); + if (retval) { // Really not going to happen + sysfs_remove_mount_point(kernel_kobj, "debug"); + kmem_cache_destroy(debugfs_inode_cachep); + return retval; + } + debugfs_registered = true; + return 0; } core_initcall(debugfs_init); diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h index bbae4a228ef4..93483fe84425 100644 --- a/fs/debugfs/internal.h +++ b/fs/debugfs/internal.h @@ -11,6 +11,22 @@ struct file_operations; +struct debugfs_inode_info { + struct inode vfs_inode; + union { + const void *raw; + const struct file_operations *real_fops; + const struct debugfs_short_fops *short_fops; + debugfs_automount_t automount; + }; + const void *aux; +}; + +static inline struct debugfs_inode_info *DEBUGFS_I(struct inode *inode) +{ + return container_of(inode, struct debugfs_inode_info, vfs_inode); +} + /* declared over in file.c */ extern const struct file_operations debugfs_noop_file_operations; extern const struct file_operations debugfs_open_proxy_file_operations; @@ -20,29 +36,25 @@ extern const struct file_operations debugfs_full_short_proxy_file_operations; struct debugfs_fsdata { const struct file_operations *real_fops; const struct debugfs_short_fops *short_fops; - union { - /* automount_fn is used when real_fops is NULL */ - debugfs_automount_t automount; - struct { - refcount_t active_users; - struct completion active_users_drained; - - /* protect cancellations */ - struct mutex cancellations_mtx; - struct list_head cancellations; - }; + struct { + refcount_t active_users; + struct completion active_users_drained; + + /* protect cancellations */ + struct mutex cancellations_mtx; + struct list_head cancellations; + unsigned int methods; }; }; -/* - * A dentry's ->d_fsdata either points to the real fops or to a - * dynamically allocated debugfs_fsdata instance. - * In order to distinguish between these two cases, a real fops - * pointer gets its lowest bit set. - */ -#define DEBUGFS_FSDATA_IS_REAL_FOPS_BIT BIT(0) +enum { + HAS_READ = 1, + HAS_WRITE = 2, + HAS_LSEEK = 4, + HAS_POLL = 8, + HAS_IOCTL = 16 +}; -/* Access BITS */ #define DEBUGFS_ALLOW_API BIT(0) #define DEBUGFS_ALLOW_MOUNT BIT(1) diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index b20e565b9c5e..1096ff8562fa 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -45,7 +45,7 @@ static int pty_limit_min; static int pty_limit_max = INT_MAX; static atomic_t pty_count = ATOMIC_INIT(0); -static struct ctl_table pty_table[] = { +static const struct ctl_table pty_table[] = { { .procname = "max", .maxlen = sizeof(int), diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c index acaa0825e9bb..1dfd5b81d831 100644 --- a/fs/ecryptfs/dentry.c +++ b/fs/ecryptfs/dentry.c @@ -17,7 +17,9 @@ /** * ecryptfs_d_revalidate - revalidate an ecryptfs dentry - * @dentry: The ecryptfs dentry + * @dir: inode of expected parent + * @name: expected name + * @dentry: dentry to revalidate * @flags: lookup flags * * Called when the VFS needs to revalidate a dentry. This @@ -28,7 +30,8 @@ * Returns 1 if valid, 0 otherwise. * */ -static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags) +static int ecryptfs_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); int rc = 1; @@ -36,8 +39,15 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags) if (flags & LOOKUP_RCU) return -ECHILD; - if (lower_dentry->d_flags & DCACHE_OP_REVALIDATE) - rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags); + if (lower_dentry->d_flags & DCACHE_OP_REVALIDATE) { + struct inode *lower_dir = ecryptfs_inode_to_lower(dir); + struct name_snapshot n; + + take_dentry_name_snapshot(&n, lower_dentry); + rc = lower_dentry->d_op->d_revalidate(lower_dir, &n.name, + lower_dentry, flags); + release_dentry_name_snapshot(&n); + } if (d_really_is_positive(dentry)) { struct inode *inode = d_inode(dentry); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index f9898e60dd8b..7c0980db77b3 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -318,7 +318,7 @@ static void unlist_file(struct epitems_head *head) static long long_zero; static long long_max = LONG_MAX; -static struct ctl_table epoll_table[] = { +static const struct ctl_table epoll_table[] = { { .procname = "max_user_watches", .data = &max_user_watches, diff --git a/fs/exec.c b/fs/exec.c index a49839174472..506cd411f4ac 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2159,7 +2159,7 @@ static int proc_dointvec_minmax_coredump(const struct ctl_table *table, int writ return error; } -static struct ctl_table fs_exec_sysctls[] = { +static const struct ctl_table fs_exec_sysctls[] = { { .procname = "suid_dumpable", .data = &suid_dumpable, diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 099f80645072..691dd77b6ab5 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -31,10 +31,9 @@ static inline void exfat_d_version_set(struct dentry *dentry, * If it happened, the negative dentry isn't actually negative anymore. So, * drop it. */ -static int exfat_d_revalidate(struct dentry *dentry, unsigned int flags) +static int exfat_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { - int ret; - if (flags & LOOKUP_RCU) return -ECHILD; @@ -58,11 +57,7 @@ static int exfat_d_revalidate(struct dentry *dentry, unsigned int flags) if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) return 0; - spin_lock(&dentry->d_lock); - ret = inode_eq_iversion(d_inode(dentry->d_parent), - exfat_d_version(dentry)); - spin_unlock(&dentry->d_lock); - return ret; + return inode_eq_iversion(dir, exfat_d_version(dentry)); } /* returns the length of a struct qstr, ignoring trailing dots if necessary */ diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 26c4fc37edcf..da4263a14a20 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -322,9 +322,7 @@ restart: WARN_ON(!list_empty(&ei->i_fc_dilist)); spin_unlock(&sbi->s_fc_lock); - if (fc_dentry->fcd_name.name && - fc_dentry->fcd_name.len > DNAME_INLINE_LEN) - kfree(fc_dentry->fcd_name.name); + release_dentry_name_snapshot(&fc_dentry->fcd_name); kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry); return; @@ -449,22 +447,7 @@ static int __track_dentry_update(handle_t *handle, struct inode *inode, node->fcd_op = dentry_update->op; node->fcd_parent = dir->i_ino; node->fcd_ino = inode->i_ino; - if (dentry->d_name.len > DNAME_INLINE_LEN) { - node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS); - if (!node->fcd_name.name) { - kmem_cache_free(ext4_fc_dentry_cachep, node); - ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_NOMEM, handle); - mutex_lock(&ei->i_fc_lock); - return -ENOMEM; - } - memcpy((u8 *)node->fcd_name.name, dentry->d_name.name, - dentry->d_name.len); - } else { - memcpy(node->fcd_iname, dentry->d_name.name, - dentry->d_name.len); - node->fcd_name.name = node->fcd_iname; - } - node->fcd_name.len = dentry->d_name.len; + take_dentry_name_snapshot(&node->fcd_name, dentry); INIT_LIST_HEAD(&node->fcd_dilist); spin_lock(&sbi->s_fc_lock); if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING || @@ -832,7 +815,7 @@ static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc, { struct ext4_fc_dentry_info fcd; struct ext4_fc_tl tl; - int dlen = fc_dentry->fcd_name.len; + int dlen = fc_dentry->fcd_name.name.len; u8 *dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + sizeof(fcd) + dlen, crc); @@ -847,7 +830,7 @@ static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc, dst += EXT4_FC_TAG_BASE_LEN; memcpy(dst, &fcd, sizeof(fcd)); dst += sizeof(fcd); - memcpy(dst, fc_dentry->fcd_name.name, dlen); + memcpy(dst, fc_dentry->fcd_name.name.name, dlen); return true; } @@ -1328,9 +1311,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid) list_del_init(&fc_dentry->fcd_dilist); spin_unlock(&sbi->s_fc_lock); - if (fc_dentry->fcd_name.name && - fc_dentry->fcd_name.len > DNAME_INLINE_LEN) - kfree(fc_dentry->fcd_name.name); + release_dentry_name_snapshot(&fc_dentry->fcd_name); kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry); spin_lock(&sbi->s_fc_lock); } diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h index 2fadb2c4780c..3bd534e4dbbf 100644 --- a/fs/ext4/fast_commit.h +++ b/fs/ext4/fast_commit.h @@ -109,8 +109,7 @@ struct ext4_fc_dentry_update { int fcd_op; /* Type of update create / unlink / link */ int fcd_parent; /* Parent inode number */ int fcd_ino; /* Inode number */ - struct qstr fcd_name; /* Dirent name */ - unsigned char fcd_iname[DNAME_INLINE_LEN]; /* Dirent name string */ + struct name_snapshot fcd_name; /* Dirent name */ struct list_head fcd_list; struct list_head fcd_dilist; }; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 15bf32c21ac0..926c26e90ef8 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -43,17 +43,13 @@ static inline void vfat_d_version_set(struct dentry *dentry, * If it happened, the negative dentry isn't actually negative * anymore. So, drop it. */ -static int vfat_revalidate_shortname(struct dentry *dentry) +static bool vfat_revalidate_shortname(struct dentry *dentry, struct inode *dir) { - int ret = 1; - spin_lock(&dentry->d_lock); - if (!inode_eq_iversion(d_inode(dentry->d_parent), vfat_d_version(dentry))) - ret = 0; - spin_unlock(&dentry->d_lock); - return ret; + return inode_eq_iversion(dir, vfat_d_version(dentry)); } -static int vfat_revalidate(struct dentry *dentry, unsigned int flags) +static int vfat_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { if (flags & LOOKUP_RCU) return -ECHILD; @@ -61,10 +57,11 @@ static int vfat_revalidate(struct dentry *dentry, unsigned int flags) /* This is not negative dentry. Always valid. */ if (d_really_is_positive(dentry)) return 1; - return vfat_revalidate_shortname(dentry); + return vfat_revalidate_shortname(dentry, dir); } -static int vfat_revalidate_ci(struct dentry *dentry, unsigned int flags) +static int vfat_revalidate_ci(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { if (flags & LOOKUP_RCU) return -ECHILD; @@ -97,7 +94,7 @@ static int vfat_revalidate_ci(struct dentry *dentry, unsigned int flags) if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) return 0; - return vfat_revalidate_shortname(dentry); + return vfat_revalidate_shortname(dentry, dir); } /* returns the length of a struct qstr, ignoring trailing dots */ diff --git a/fs/file_table.c b/fs/file_table.c index a32171d2b83f..7f7c378c6e31 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -106,7 +106,7 @@ static int proc_nr_files(const struct ctl_table *table, int write, void *buffer, return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } -static struct ctl_table fs_stat_sysctls[] = { +static const struct ctl_table fs_stat_sysctls[] = { { .procname = "file-nr", .data = &files_stat, diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig index 8674dbfbe59d..ca215a3cba3e 100644 --- a/fs/fuse/Kconfig +++ b/fs/fuse/Kconfig @@ -63,3 +63,15 @@ config FUSE_PASSTHROUGH to be performed directly on a backing file. If you want to allow passthrough operations, answer Y. + +config FUSE_IO_URING + bool "FUSE communication over io-uring" + default y + depends on FUSE_FS + depends on IO_URING + help + This allows sending FUSE requests over the io-uring interface and + also adds request core affinity. + + If you want to allow fuse server/client communication through io-uring, + answer Y diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile index 2c372180d631..3f0f312a31c1 100644 --- a/fs/fuse/Makefile +++ b/fs/fuse/Makefile @@ -15,5 +15,6 @@ fuse-y += iomode.o fuse-$(CONFIG_FUSE_DAX) += dax.o fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o fuse-$(CONFIG_SYSCTL) += sysctl.o +fuse-$(CONFIG_FUSE_IO_URING) += dev_uring.o virtiofs-y := virtio_fs.o diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index 9abbc2f2894f..0b6ee6dd1fd6 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -240,11 +240,12 @@ static int fuse_send_removemapping(struct inode *inode, args.opcode = FUSE_REMOVEMAPPING; args.nodeid = fi->nodeid; - args.in_numargs = 2; - args.in_args[0].size = sizeof(*inargp); - args.in_args[0].value = inargp; - args.in_args[1].size = inargp->count * sizeof(*remove_one); - args.in_args[1].value = remove_one; + args.in_numargs = 3; + fuse_set_zero_arg0(&args); + args.in_args[1].size = sizeof(*inargp); + args.in_args[1].value = inargp; + args.in_args[2].size = inargp->count * sizeof(*remove_one); + args.in_args[2].value = remove_one; return fuse_simple_request(fm, &args); } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 27ccae63495d..5b5f789b37eb 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -6,7 +6,9 @@ See the file COPYING. */ +#include "dev_uring_i.h" #include "fuse_i.h" +#include "fuse_dev_i.h" #include <linux/init.h> #include <linux/module.h> @@ -28,23 +30,8 @@ MODULE_ALIAS_MISCDEV(FUSE_MINOR); MODULE_ALIAS("devname:fuse"); -/* Ordinary requests have even IDs, while interrupts IDs are odd */ -#define FUSE_INT_REQ_BIT (1ULL << 0) -#define FUSE_REQ_ID_STEP (1ULL << 1) - static struct kmem_cache *fuse_req_cachep; -static void end_requests(struct list_head *head); - -static struct fuse_dev *fuse_get_dev(struct file *file) -{ - /* - * Lockless access is OK, because file->private data is set - * once during mount and is valid until the file is released. - */ - return READ_ONCE(file->private_data); -} - static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req) { INIT_LIST_HEAD(&req->list); @@ -89,7 +76,8 @@ void fuse_set_initialized(struct fuse_conn *fc) static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background) { - return !fc->initialized || (for_background && fc->blocked); + return !fc->initialized || (for_background && fc->blocked) || + (fc->io_uring && !fuse_uring_ready(fc)); } static void fuse_drop_waiting(struct fuse_conn *fc) @@ -234,7 +222,7 @@ u64 fuse_get_unique(struct fuse_iqueue *fiq) } EXPORT_SYMBOL_GPL(fuse_get_unique); -static unsigned int fuse_req_hash(u64 unique) +unsigned int fuse_req_hash(u64 unique) { return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS); } @@ -250,7 +238,8 @@ __releases(fiq->lock) spin_unlock(&fiq->lock); } -static void fuse_dev_queue_forget(struct fuse_iqueue *fiq, struct fuse_forget_link *forget) +void fuse_dev_queue_forget(struct fuse_iqueue *fiq, + struct fuse_forget_link *forget) { spin_lock(&fiq->lock); if (fiq->connected) { @@ -263,7 +252,7 @@ static void fuse_dev_queue_forget(struct fuse_iqueue *fiq, struct fuse_forget_li } } -static void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) +void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) { spin_lock(&fiq->lock); if (list_empty(&req->intr_entry)) { @@ -580,7 +569,25 @@ ssize_t __fuse_simple_request(struct mnt_idmap *idmap, return ret; } -static bool fuse_request_queue_background(struct fuse_req *req) +#ifdef CONFIG_FUSE_IO_URING +static bool fuse_request_queue_background_uring(struct fuse_conn *fc, + struct fuse_req *req) +{ + struct fuse_iqueue *fiq = &fc->iq; + + req->in.h.unique = fuse_get_unique(fiq); + req->in.h.len = sizeof(struct fuse_in_header) + + fuse_len_args(req->args->in_numargs, + (struct fuse_arg *) req->args->in_args); + + return fuse_uring_queue_bq_req(req); +} +#endif + +/* + * @return true if queued + */ +static int fuse_request_queue_background(struct fuse_req *req) { struct fuse_mount *fm = req->fm; struct fuse_conn *fc = fm->fc; @@ -592,6 +599,12 @@ static bool fuse_request_queue_background(struct fuse_req *req) atomic_inc(&fc->num_waiting); } __set_bit(FR_ISREPLY, &req->flags); + +#ifdef CONFIG_FUSE_IO_URING + if (fuse_uring_ready(fc)) + return fuse_request_queue_background_uring(fc, req); +#endif + spin_lock(&fc->bg_lock); if (likely(fc->connected)) { fc->num_background++; @@ -692,22 +705,8 @@ static int unlock_request(struct fuse_req *req) return err; } -struct fuse_copy_state { - int write; - struct fuse_req *req; - struct iov_iter *iter; - struct pipe_buffer *pipebufs; - struct pipe_buffer *currbuf; - struct pipe_inode_info *pipe; - unsigned long nr_segs; - struct page *pg; - unsigned len; - unsigned offset; - unsigned move_pages:1; -}; - -static void fuse_copy_init(struct fuse_copy_state *cs, int write, - struct iov_iter *iter) +void fuse_copy_init(struct fuse_copy_state *cs, int write, + struct iov_iter *iter) { memset(cs, 0, sizeof(*cs)); cs->write = write; @@ -814,6 +813,9 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size) *size -= ncpy; cs->len -= ncpy; cs->offset += ncpy; + if (cs->is_uring) + cs->ring.copied_sz += ncpy; + return ncpy; } @@ -1068,9 +1070,9 @@ static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size) } /* Copy request arguments to/from userspace buffer */ -static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs, - unsigned argpages, struct fuse_arg *args, - int zeroing) +int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs, + unsigned argpages, struct fuse_arg *args, + int zeroing) { int err = 0; unsigned i; @@ -1760,7 +1762,7 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode, args = &ap->args; args->nodeid = outarg->nodeid; args->opcode = FUSE_NOTIFY_REPLY; - args->in_numargs = 2; + args->in_numargs = 3; args->in_pages = true; args->end = fuse_retrieve_end; @@ -1788,9 +1790,10 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode, } ra->inarg.offset = outarg->offset; ra->inarg.size = total_len; - args->in_args[0].size = sizeof(ra->inarg); - args->in_args[0].value = &ra->inarg; - args->in_args[1].size = total_len; + fuse_set_zero_arg0(args); + args->in_args[1].size = sizeof(ra->inarg); + args->in_args[1].value = &ra->inarg; + args->in_args[2].size = total_len; err = fuse_simple_notify_reply(fm, args, outarg->notify_unique); if (err) @@ -1885,7 +1888,7 @@ static void fuse_resend(struct fuse_conn *fc) spin_unlock(&fiq->lock); list_for_each_entry(req, &to_queue, list) clear_bit(FR_PENDING, &req->flags); - end_requests(&to_queue); + fuse_dev_end_requests(&to_queue); return; } /* iq and pq requests are both oldest to newest */ @@ -1934,7 +1937,7 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, } /* Look up request on processing list by unique ID */ -static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique) +struct fuse_req *fuse_request_find(struct fuse_pqueue *fpq, u64 unique) { unsigned int hash = fuse_req_hash(unique); struct fuse_req *req; @@ -1946,10 +1949,17 @@ static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique) return NULL; } -static int copy_out_args(struct fuse_copy_state *cs, struct fuse_args *args, - unsigned nbytes) +int fuse_copy_out_args(struct fuse_copy_state *cs, struct fuse_args *args, + unsigned nbytes) { - unsigned reqsize = sizeof(struct fuse_out_header); + + unsigned int reqsize = 0; + + /* + * Uring has all headers separated from args - args is payload only + */ + if (!cs->is_uring) + reqsize = sizeof(struct fuse_out_header); reqsize += fuse_len_args(args->out_numargs, args->out_args); @@ -2011,7 +2021,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, spin_lock(&fpq->lock); req = NULL; if (fpq->connected) - req = request_find(fpq, oh.unique & ~FUSE_INT_REQ_BIT); + req = fuse_request_find(fpq, oh.unique & ~FUSE_INT_REQ_BIT); err = -ENOENT; if (!req) { @@ -2049,7 +2059,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, if (oh.error) err = nbytes != sizeof(oh) ? -EINVAL : 0; else - err = copy_out_args(cs, req->args, nbytes); + err = fuse_copy_out_args(cs, req->args, nbytes); fuse_copy_finish(cs); spin_lock(&fpq->lock); @@ -2204,7 +2214,7 @@ static __poll_t fuse_dev_poll(struct file *file, poll_table *wait) } /* Abort all requests on the given list (pending or processing) */ -static void end_requests(struct list_head *head) +void fuse_dev_end_requests(struct list_head *head) { while (!list_empty(head)) { struct fuse_req *req; @@ -2307,7 +2317,13 @@ void fuse_abort_conn(struct fuse_conn *fc) wake_up_all(&fc->blocked_waitq); spin_unlock(&fc->lock); - end_requests(&to_end); + fuse_dev_end_requests(&to_end); + + /* + * fc->lock must not be taken to avoid conflicts with io-uring + * locks + */ + fuse_uring_abort(fc); } else { spin_unlock(&fc->lock); } @@ -2319,6 +2335,8 @@ void fuse_wait_aborted(struct fuse_conn *fc) /* matches implicit memory barrier in fuse_drop_waiting() */ smp_mb(); wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0); + + fuse_uring_wait_stopped_queues(fc); } int fuse_dev_release(struct inode *inode, struct file *file) @@ -2337,7 +2355,7 @@ int fuse_dev_release(struct inode *inode, struct file *file) list_splice_init(&fpq->processing[i], &to_end); spin_unlock(&fpq->lock); - end_requests(&to_end); + fuse_dev_end_requests(&to_end); /* Are we the last open device? */ if (atomic_dec_and_test(&fc->dev_count)) { @@ -2475,6 +2493,9 @@ const struct file_operations fuse_dev_operations = { .fasync = fuse_dev_fasync, .unlocked_ioctl = fuse_dev_ioctl, .compat_ioctl = compat_ptr_ioctl, +#ifdef CONFIG_FUSE_IO_URING + .uring_cmd = fuse_uring_cmd, +#endif }; EXPORT_SYMBOL_GPL(fuse_dev_operations); diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c new file mode 100644 index 000000000000..ebd2931b4f2a --- /dev/null +++ b/fs/fuse/dev_uring.c @@ -0,0 +1,1319 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * FUSE: Filesystem in Userspace + * Copyright (c) 2023-2024 DataDirect Networks. + */ + +#include "fuse_i.h" +#include "dev_uring_i.h" +#include "fuse_dev_i.h" + +#include <linux/fs.h> +#include <linux/io_uring/cmd.h> + +static bool __read_mostly enable_uring; +module_param(enable_uring, bool, 0644); +MODULE_PARM_DESC(enable_uring, + "Enable userspace communication through io-uring"); + +#define FUSE_URING_IOV_SEGS 2 /* header and payload */ + + +bool fuse_uring_enabled(void) +{ + return enable_uring; +} + +struct fuse_uring_pdu { + struct fuse_ring_ent *ent; +}; + +static const struct fuse_iqueue_ops fuse_io_uring_ops; + +static void uring_cmd_set_ring_ent(struct io_uring_cmd *cmd, + struct fuse_ring_ent *ring_ent) +{ + struct fuse_uring_pdu *pdu = + io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu); + + pdu->ent = ring_ent; +} + +static struct fuse_ring_ent *uring_cmd_to_ring_ent(struct io_uring_cmd *cmd) +{ + struct fuse_uring_pdu *pdu = + io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu); + + return pdu->ent; +} + +static void fuse_uring_flush_bg(struct fuse_ring_queue *queue) +{ + struct fuse_ring *ring = queue->ring; + struct fuse_conn *fc = ring->fc; + + lockdep_assert_held(&queue->lock); + lockdep_assert_held(&fc->bg_lock); + + /* + * Allow one bg request per queue, ignoring global fc limits. + * This prevents a single queue from consuming all resources and + * eliminates the need for remote queue wake-ups when global + * limits are met but this queue has no more waiting requests. + */ + while ((fc->active_background < fc->max_background || + !queue->active_background) && + (!list_empty(&queue->fuse_req_bg_queue))) { + struct fuse_req *req; + + req = list_first_entry(&queue->fuse_req_bg_queue, + struct fuse_req, list); + fc->active_background++; + queue->active_background++; + + list_move_tail(&req->list, &queue->fuse_req_queue); + } +} + +static void fuse_uring_req_end(struct fuse_ring_ent *ent, struct fuse_req *req, + int error) +{ + struct fuse_ring_queue *queue = ent->queue; + struct fuse_ring *ring = queue->ring; + struct fuse_conn *fc = ring->fc; + + lockdep_assert_not_held(&queue->lock); + spin_lock(&queue->lock); + ent->fuse_req = NULL; + if (test_bit(FR_BACKGROUND, &req->flags)) { + queue->active_background--; + spin_lock(&fc->bg_lock); + fuse_uring_flush_bg(queue); + spin_unlock(&fc->bg_lock); + } + + spin_unlock(&queue->lock); + + if (error) + req->out.h.error = error; + + clear_bit(FR_SENT, &req->flags); + fuse_request_end(req); +} + +/* Abort all list queued request on the given ring queue */ +static void fuse_uring_abort_end_queue_requests(struct fuse_ring_queue *queue) +{ + struct fuse_req *req; + LIST_HEAD(req_list); + + spin_lock(&queue->lock); + list_for_each_entry(req, &queue->fuse_req_queue, list) + clear_bit(FR_PENDING, &req->flags); + list_splice_init(&queue->fuse_req_queue, &req_list); + spin_unlock(&queue->lock); + + /* must not hold queue lock to avoid order issues with fi->lock */ + fuse_dev_end_requests(&req_list); +} + +void fuse_uring_abort_end_requests(struct fuse_ring *ring) +{ + int qid; + struct fuse_ring_queue *queue; + struct fuse_conn *fc = ring->fc; + + for (qid = 0; qid < ring->nr_queues; qid++) { + queue = READ_ONCE(ring->queues[qid]); + if (!queue) + continue; + + queue->stopped = true; + + WARN_ON_ONCE(ring->fc->max_background != UINT_MAX); + spin_lock(&queue->lock); + spin_lock(&fc->bg_lock); + fuse_uring_flush_bg(queue); + spin_unlock(&fc->bg_lock); + spin_unlock(&queue->lock); + fuse_uring_abort_end_queue_requests(queue); + } +} + +void fuse_uring_destruct(struct fuse_conn *fc) +{ + struct fuse_ring *ring = fc->ring; + int qid; + + if (!ring) + return; + + for (qid = 0; qid < ring->nr_queues; qid++) { + struct fuse_ring_queue *queue = ring->queues[qid]; + struct fuse_ring_ent *ent, *next; + + if (!queue) + continue; + + WARN_ON(!list_empty(&queue->ent_avail_queue)); + WARN_ON(!list_empty(&queue->ent_w_req_queue)); + WARN_ON(!list_empty(&queue->ent_commit_queue)); + WARN_ON(!list_empty(&queue->ent_in_userspace)); + + list_for_each_entry_safe(ent, next, &queue->ent_released, + list) { + list_del_init(&ent->list); + kfree(ent); + } + + kfree(queue->fpq.processing); + kfree(queue); + ring->queues[qid] = NULL; + } + + kfree(ring->queues); + kfree(ring); + fc->ring = NULL; +} + +/* + * Basic ring setup for this connection based on the provided configuration + */ +static struct fuse_ring *fuse_uring_create(struct fuse_conn *fc) +{ + struct fuse_ring *ring; + size_t nr_queues = num_possible_cpus(); + struct fuse_ring *res = NULL; + size_t max_payload_size; + + ring = kzalloc(sizeof(*fc->ring), GFP_KERNEL_ACCOUNT); + if (!ring) + return NULL; + + ring->queues = kcalloc(nr_queues, sizeof(struct fuse_ring_queue *), + GFP_KERNEL_ACCOUNT); + if (!ring->queues) + goto out_err; + + max_payload_size = max(FUSE_MIN_READ_BUFFER, fc->max_write); + max_payload_size = max(max_payload_size, fc->max_pages * PAGE_SIZE); + + spin_lock(&fc->lock); + if (fc->ring) { + /* race, another thread created the ring in the meantime */ + spin_unlock(&fc->lock); + res = fc->ring; + goto out_err; + } + + init_waitqueue_head(&ring->stop_waitq); + + fc->ring = ring; + ring->nr_queues = nr_queues; + ring->fc = fc; + ring->max_payload_sz = max_payload_size; + atomic_set(&ring->queue_refs, 0); + + spin_unlock(&fc->lock); + return ring; + +out_err: + kfree(ring->queues); + kfree(ring); + return res; +} + +static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring, + int qid) +{ + struct fuse_conn *fc = ring->fc; + struct fuse_ring_queue *queue; + struct list_head *pq; + + queue = kzalloc(sizeof(*queue), GFP_KERNEL_ACCOUNT); + if (!queue) + return NULL; + pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL); + if (!pq) { + kfree(queue); + return NULL; + } + + queue->qid = qid; + queue->ring = ring; + spin_lock_init(&queue->lock); + + INIT_LIST_HEAD(&queue->ent_avail_queue); + INIT_LIST_HEAD(&queue->ent_commit_queue); + INIT_LIST_HEAD(&queue->ent_w_req_queue); + INIT_LIST_HEAD(&queue->ent_in_userspace); + INIT_LIST_HEAD(&queue->fuse_req_queue); + INIT_LIST_HEAD(&queue->fuse_req_bg_queue); + INIT_LIST_HEAD(&queue->ent_released); + + queue->fpq.processing = pq; + fuse_pqueue_init(&queue->fpq); + + spin_lock(&fc->lock); + if (ring->queues[qid]) { + spin_unlock(&fc->lock); + kfree(queue->fpq.processing); + kfree(queue); + return ring->queues[qid]; + } + + /* + * write_once and lock as the caller mostly doesn't take the lock at all + */ + WRITE_ONCE(ring->queues[qid], queue); + spin_unlock(&fc->lock); + + return queue; +} + +static void fuse_uring_stop_fuse_req_end(struct fuse_req *req) +{ + clear_bit(FR_SENT, &req->flags); + req->out.h.error = -ECONNABORTED; + fuse_request_end(req); +} + +/* + * Release a request/entry on connection tear down + */ +static void fuse_uring_entry_teardown(struct fuse_ring_ent *ent) +{ + struct fuse_req *req; + struct io_uring_cmd *cmd; + + struct fuse_ring_queue *queue = ent->queue; + + spin_lock(&queue->lock); + cmd = ent->cmd; + ent->cmd = NULL; + req = ent->fuse_req; + ent->fuse_req = NULL; + if (req) { + /* remove entry from queue->fpq->processing */ + list_del_init(&req->list); + } + + /* + * The entry must not be freed immediately, due to access of direct + * pointer access of entries through IO_URING_F_CANCEL - there is a risk + * of race between daemon termination (which triggers IO_URING_F_CANCEL + * and accesses entries without checking the list state first + */ + list_move(&ent->list, &queue->ent_released); + ent->state = FRRS_RELEASED; + spin_unlock(&queue->lock); + + if (cmd) + io_uring_cmd_done(cmd, -ENOTCONN, 0, IO_URING_F_UNLOCKED); + + if (req) + fuse_uring_stop_fuse_req_end(req); +} + +static void fuse_uring_stop_list_entries(struct list_head *head, + struct fuse_ring_queue *queue, + enum fuse_ring_req_state exp_state) +{ + struct fuse_ring *ring = queue->ring; + struct fuse_ring_ent *ent, *next; + ssize_t queue_refs = SSIZE_MAX; + LIST_HEAD(to_teardown); + + spin_lock(&queue->lock); + list_for_each_entry_safe(ent, next, head, list) { + if (ent->state != exp_state) { + pr_warn("entry teardown qid=%d state=%d expected=%d", + queue->qid, ent->state, exp_state); + continue; + } + + ent->state = FRRS_TEARDOWN; + list_move(&ent->list, &to_teardown); + } + spin_unlock(&queue->lock); + + /* no queue lock to avoid lock order issues */ + list_for_each_entry_safe(ent, next, &to_teardown, list) { + fuse_uring_entry_teardown(ent); + queue_refs = atomic_dec_return(&ring->queue_refs); + WARN_ON_ONCE(queue_refs < 0); + } +} + +static void fuse_uring_teardown_entries(struct fuse_ring_queue *queue) +{ + fuse_uring_stop_list_entries(&queue->ent_in_userspace, queue, + FRRS_USERSPACE); + fuse_uring_stop_list_entries(&queue->ent_avail_queue, queue, + FRRS_AVAILABLE); +} + +/* + * Log state debug info + */ +static void fuse_uring_log_ent_state(struct fuse_ring *ring) +{ + int qid; + struct fuse_ring_ent *ent; + + for (qid = 0; qid < ring->nr_queues; qid++) { + struct fuse_ring_queue *queue = ring->queues[qid]; + + if (!queue) + continue; + + spin_lock(&queue->lock); + /* + * Log entries from the intermediate queue, the other queues + * should be empty + */ + list_for_each_entry(ent, &queue->ent_w_req_queue, list) { + pr_info(" ent-req-queue ring=%p qid=%d ent=%p state=%d\n", + ring, qid, ent, ent->state); + } + list_for_each_entry(ent, &queue->ent_commit_queue, list) { + pr_info(" ent-commit-queue ring=%p qid=%d ent=%p state=%d\n", + ring, qid, ent, ent->state); + } + spin_unlock(&queue->lock); + } + ring->stop_debug_log = 1; +} + +static void fuse_uring_async_stop_queues(struct work_struct *work) +{ + int qid; + struct fuse_ring *ring = + container_of(work, struct fuse_ring, async_teardown_work.work); + + /* XXX code dup */ + for (qid = 0; qid < ring->nr_queues; qid++) { + struct fuse_ring_queue *queue = READ_ONCE(ring->queues[qid]); + + if (!queue) + continue; + + fuse_uring_teardown_entries(queue); + } + + /* + * Some ring entries might be in the middle of IO operations, + * i.e. in process to get handled by file_operations::uring_cmd + * or on the way to userspace - we could handle that with conditions in + * run time code, but easier/cleaner to have an async tear down handler + * If there are still queue references left + */ + if (atomic_read(&ring->queue_refs) > 0) { + if (time_after(jiffies, + ring->teardown_time + FUSE_URING_TEARDOWN_TIMEOUT)) + fuse_uring_log_ent_state(ring); + + schedule_delayed_work(&ring->async_teardown_work, + FUSE_URING_TEARDOWN_INTERVAL); + } else { + wake_up_all(&ring->stop_waitq); + } +} + +/* + * Stop the ring queues + */ +void fuse_uring_stop_queues(struct fuse_ring *ring) +{ + int qid; + + for (qid = 0; qid < ring->nr_queues; qid++) { + struct fuse_ring_queue *queue = READ_ONCE(ring->queues[qid]); + + if (!queue) + continue; + + fuse_uring_teardown_entries(queue); + } + + if (atomic_read(&ring->queue_refs) > 0) { + ring->teardown_time = jiffies; + INIT_DELAYED_WORK(&ring->async_teardown_work, + fuse_uring_async_stop_queues); + schedule_delayed_work(&ring->async_teardown_work, + FUSE_URING_TEARDOWN_INTERVAL); + } else { + wake_up_all(&ring->stop_waitq); + } +} + +/* + * Handle IO_URING_F_CANCEL, typically should come on daemon termination. + * + * Releasing the last entry should trigger fuse_dev_release() if + * the daemon was terminated + */ +static void fuse_uring_cancel(struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ + struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd); + struct fuse_ring_queue *queue; + bool need_cmd_done = false; + + /* + * direct access on ent - it must not be destructed as long as + * IO_URING_F_CANCEL might come up + */ + queue = ent->queue; + spin_lock(&queue->lock); + if (ent->state == FRRS_AVAILABLE) { + ent->state = FRRS_USERSPACE; + list_move(&ent->list, &queue->ent_in_userspace); + need_cmd_done = true; + ent->cmd = NULL; + } + spin_unlock(&queue->lock); + + if (need_cmd_done) { + /* no queue lock to avoid lock order issues */ + io_uring_cmd_done(cmd, -ENOTCONN, 0, issue_flags); + } +} + +static void fuse_uring_prepare_cancel(struct io_uring_cmd *cmd, int issue_flags, + struct fuse_ring_ent *ring_ent) +{ + uring_cmd_set_ring_ent(cmd, ring_ent); + io_uring_cmd_mark_cancelable(cmd, issue_flags); +} + +/* + * Checks for errors and stores it into the request + */ +static int fuse_uring_out_header_has_err(struct fuse_out_header *oh, + struct fuse_req *req, + struct fuse_conn *fc) +{ + int err; + + err = -EINVAL; + if (oh->unique == 0) { + /* Not supported through io-uring yet */ + pr_warn_once("notify through fuse-io-uring not supported\n"); + goto err; + } + + if (oh->error <= -ERESTARTSYS || oh->error > 0) + goto err; + + if (oh->error) { + err = oh->error; + goto err; + } + + err = -ENOENT; + if ((oh->unique & ~FUSE_INT_REQ_BIT) != req->in.h.unique) { + pr_warn_ratelimited("unique mismatch, expected: %llu got %llu\n", + req->in.h.unique, + oh->unique & ~FUSE_INT_REQ_BIT); + goto err; + } + + /* + * Is it an interrupt reply ID? + * XXX: Not supported through fuse-io-uring yet, it should not even + * find the request - should not happen. + */ + WARN_ON_ONCE(oh->unique & FUSE_INT_REQ_BIT); + + err = 0; +err: + return err; +} + +static int fuse_uring_copy_from_ring(struct fuse_ring *ring, + struct fuse_req *req, + struct fuse_ring_ent *ent) +{ + struct fuse_copy_state cs; + struct fuse_args *args = req->args; + struct iov_iter iter; + int err; + struct fuse_uring_ent_in_out ring_in_out; + + err = copy_from_user(&ring_in_out, &ent->headers->ring_ent_in_out, + sizeof(ring_in_out)); + if (err) + return -EFAULT; + + err = import_ubuf(ITER_SOURCE, ent->payload, ring->max_payload_sz, + &iter); + if (err) + return err; + + fuse_copy_init(&cs, 0, &iter); + cs.is_uring = 1; + cs.req = req; + + return fuse_copy_out_args(&cs, args, ring_in_out.payload_sz); +} + + /* + * Copy data from the req to the ring buffer + */ +static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req, + struct fuse_ring_ent *ent) +{ + struct fuse_copy_state cs; + struct fuse_args *args = req->args; + struct fuse_in_arg *in_args = args->in_args; + int num_args = args->in_numargs; + int err; + struct iov_iter iter; + struct fuse_uring_ent_in_out ent_in_out = { + .flags = 0, + .commit_id = req->in.h.unique, + }; + + err = import_ubuf(ITER_DEST, ent->payload, ring->max_payload_sz, &iter); + if (err) { + pr_info_ratelimited("fuse: Import of user buffer failed\n"); + return err; + } + + fuse_copy_init(&cs, 1, &iter); + cs.is_uring = 1; + cs.req = req; + + if (num_args > 0) { + /* + * Expectation is that the first argument is the per op header. + * Some op code have that as zero size. + */ + if (args->in_args[0].size > 0) { + err = copy_to_user(&ent->headers->op_in, in_args->value, + in_args->size); + if (err) { + pr_info_ratelimited( + "Copying the header failed.\n"); + return -EFAULT; + } + } + in_args++; + num_args--; + } + + /* copy the payload */ + err = fuse_copy_args(&cs, num_args, args->in_pages, + (struct fuse_arg *)in_args, 0); + if (err) { + pr_info_ratelimited("%s fuse_copy_args failed\n", __func__); + return err; + } + + ent_in_out.payload_sz = cs.ring.copied_sz; + err = copy_to_user(&ent->headers->ring_ent_in_out, &ent_in_out, + sizeof(ent_in_out)); + return err ? -EFAULT : 0; +} + +static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent, + struct fuse_req *req) +{ + struct fuse_ring_queue *queue = ent->queue; + struct fuse_ring *ring = queue->ring; + int err; + + err = -EIO; + if (WARN_ON(ent->state != FRRS_FUSE_REQ)) { + pr_err("qid=%d ring-req=%p invalid state %d on send\n", + queue->qid, ent, ent->state); + return err; + } + + err = -EINVAL; + if (WARN_ON(req->in.h.unique == 0)) + return err; + + /* copy the request */ + err = fuse_uring_args_to_ring(ring, req, ent); + if (unlikely(err)) { + pr_info_ratelimited("Copy to ring failed: %d\n", err); + return err; + } + + /* copy fuse_in_header */ + err = copy_to_user(&ent->headers->in_out, &req->in.h, + sizeof(req->in.h)); + if (err) { + err = -EFAULT; + return err; + } + + return 0; +} + +static int fuse_uring_prepare_send(struct fuse_ring_ent *ent, + struct fuse_req *req) +{ + int err; + + err = fuse_uring_copy_to_ring(ent, req); + if (!err) + set_bit(FR_SENT, &req->flags); + else + fuse_uring_req_end(ent, req, err); + + return err; +} + +/* + * Write data to the ring buffer and send the request to userspace, + * userspace will read it + * This is comparable with classical read(/dev/fuse) + */ +static int fuse_uring_send_next_to_ring(struct fuse_ring_ent *ent, + struct fuse_req *req, + unsigned int issue_flags) +{ + struct fuse_ring_queue *queue = ent->queue; + int err; + struct io_uring_cmd *cmd; + + err = fuse_uring_prepare_send(ent, req); + if (err) + return err; + + spin_lock(&queue->lock); + cmd = ent->cmd; + ent->cmd = NULL; + ent->state = FRRS_USERSPACE; + list_move(&ent->list, &queue->ent_in_userspace); + spin_unlock(&queue->lock); + + io_uring_cmd_done(cmd, 0, 0, issue_flags); + return 0; +} + +/* + * Make a ring entry available for fuse_req assignment + */ +static void fuse_uring_ent_avail(struct fuse_ring_ent *ent, + struct fuse_ring_queue *queue) +{ + WARN_ON_ONCE(!ent->cmd); + list_move(&ent->list, &queue->ent_avail_queue); + ent->state = FRRS_AVAILABLE; +} + +/* Used to find the request on SQE commit */ +static void fuse_uring_add_to_pq(struct fuse_ring_ent *ent, + struct fuse_req *req) +{ + struct fuse_ring_queue *queue = ent->queue; + struct fuse_pqueue *fpq = &queue->fpq; + unsigned int hash; + + req->ring_entry = ent; + hash = fuse_req_hash(req->in.h.unique); + list_move_tail(&req->list, &fpq->processing[hash]); +} + +/* + * Assign a fuse queue entry to the given entry + */ +static void fuse_uring_add_req_to_ring_ent(struct fuse_ring_ent *ent, + struct fuse_req *req) +{ + struct fuse_ring_queue *queue = ent->queue; + struct fuse_conn *fc = req->fm->fc; + struct fuse_iqueue *fiq = &fc->iq; + + lockdep_assert_held(&queue->lock); + + if (WARN_ON_ONCE(ent->state != FRRS_AVAILABLE && + ent->state != FRRS_COMMIT)) { + pr_warn("%s qid=%d state=%d\n", __func__, ent->queue->qid, + ent->state); + } + + spin_lock(&fiq->lock); + clear_bit(FR_PENDING, &req->flags); + spin_unlock(&fiq->lock); + ent->fuse_req = req; + ent->state = FRRS_FUSE_REQ; + list_move(&ent->list, &queue->ent_w_req_queue); + fuse_uring_add_to_pq(ent, req); +} + +/* Fetch the next fuse request if available */ +static struct fuse_req *fuse_uring_ent_assign_req(struct fuse_ring_ent *ent) + __must_hold(&queue->lock) +{ + struct fuse_req *req; + struct fuse_ring_queue *queue = ent->queue; + struct list_head *req_queue = &queue->fuse_req_queue; + + lockdep_assert_held(&queue->lock); + + /* get and assign the next entry while it is still holding the lock */ + req = list_first_entry_or_null(req_queue, struct fuse_req, list); + if (req) + fuse_uring_add_req_to_ring_ent(ent, req); + + return req; +} + +/* + * Read data from the ring buffer, which user space has written to + * This is comparible with handling of classical write(/dev/fuse). + * Also make the ring request available again for new fuse requests. + */ +static void fuse_uring_commit(struct fuse_ring_ent *ent, struct fuse_req *req, + unsigned int issue_flags) +{ + struct fuse_ring *ring = ent->queue->ring; + struct fuse_conn *fc = ring->fc; + ssize_t err = 0; + + err = copy_from_user(&req->out.h, &ent->headers->in_out, + sizeof(req->out.h)); + if (err) { + req->out.h.error = -EFAULT; + goto out; + } + + err = fuse_uring_out_header_has_err(&req->out.h, req, fc); + if (err) { + /* req->out.h.error already set */ + goto out; + } + + err = fuse_uring_copy_from_ring(ring, req, ent); +out: + fuse_uring_req_end(ent, req, err); +} + +/* + * Get the next fuse req and send it + */ +static void fuse_uring_next_fuse_req(struct fuse_ring_ent *ent, + struct fuse_ring_queue *queue, + unsigned int issue_flags) +{ + int err; + struct fuse_req *req; + +retry: + spin_lock(&queue->lock); + fuse_uring_ent_avail(ent, queue); + req = fuse_uring_ent_assign_req(ent); + spin_unlock(&queue->lock); + + if (req) { + err = fuse_uring_send_next_to_ring(ent, req, issue_flags); + if (err) + goto retry; + } +} + +static int fuse_ring_ent_set_commit(struct fuse_ring_ent *ent) +{ + struct fuse_ring_queue *queue = ent->queue; + + lockdep_assert_held(&queue->lock); + + if (WARN_ON_ONCE(ent->state != FRRS_USERSPACE)) + return -EIO; + + ent->state = FRRS_COMMIT; + list_move(&ent->list, &queue->ent_commit_queue); + + return 0; +} + +/* FUSE_URING_CMD_COMMIT_AND_FETCH handler */ +static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags, + struct fuse_conn *fc) +{ + const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe); + struct fuse_ring_ent *ent; + int err; + struct fuse_ring *ring = fc->ring; + struct fuse_ring_queue *queue; + uint64_t commit_id = READ_ONCE(cmd_req->commit_id); + unsigned int qid = READ_ONCE(cmd_req->qid); + struct fuse_pqueue *fpq; + struct fuse_req *req; + + err = -ENOTCONN; + if (!ring) + return err; + + if (qid >= ring->nr_queues) + return -EINVAL; + + queue = ring->queues[qid]; + if (!queue) + return err; + fpq = &queue->fpq; + + if (!READ_ONCE(fc->connected) || READ_ONCE(queue->stopped)) + return err; + + spin_lock(&queue->lock); + /* Find a request based on the unique ID of the fuse request + * This should get revised, as it needs a hash calculation and list + * search. And full struct fuse_pqueue is needed (memory overhead). + * As well as the link from req to ring_ent. + */ + req = fuse_request_find(fpq, commit_id); + err = -ENOENT; + if (!req) { + pr_info("qid=%d commit_id %llu not found\n", queue->qid, + commit_id); + spin_unlock(&queue->lock); + return err; + } + list_del_init(&req->list); + ent = req->ring_entry; + req->ring_entry = NULL; + + err = fuse_ring_ent_set_commit(ent); + if (err != 0) { + pr_info_ratelimited("qid=%d commit_id %llu state %d", + queue->qid, commit_id, ent->state); + spin_unlock(&queue->lock); + req->out.h.error = err; + clear_bit(FR_SENT, &req->flags); + fuse_request_end(req); + return err; + } + + ent->cmd = cmd; + spin_unlock(&queue->lock); + + /* without the queue lock, as other locks are taken */ + fuse_uring_prepare_cancel(cmd, issue_flags, ent); + fuse_uring_commit(ent, req, issue_flags); + + /* + * Fetching the next request is absolutely required as queued + * fuse requests would otherwise not get processed - committing + * and fetching is done in one step vs legacy fuse, which has separated + * read (fetch request) and write (commit result). + */ + fuse_uring_next_fuse_req(ent, queue, issue_flags); + return 0; +} + +static bool is_ring_ready(struct fuse_ring *ring, int current_qid) +{ + int qid; + struct fuse_ring_queue *queue; + bool ready = true; + + for (qid = 0; qid < ring->nr_queues && ready; qid++) { + if (current_qid == qid) + continue; + + queue = ring->queues[qid]; + if (!queue) { + ready = false; + break; + } + + spin_lock(&queue->lock); + if (list_empty(&queue->ent_avail_queue)) + ready = false; + spin_unlock(&queue->lock); + } + + return ready; +} + +/* + * fuse_uring_req_fetch command handling + */ +static void fuse_uring_do_register(struct fuse_ring_ent *ent, + struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ + struct fuse_ring_queue *queue = ent->queue; + struct fuse_ring *ring = queue->ring; + struct fuse_conn *fc = ring->fc; + struct fuse_iqueue *fiq = &fc->iq; + + fuse_uring_prepare_cancel(cmd, issue_flags, ent); + + spin_lock(&queue->lock); + ent->cmd = cmd; + fuse_uring_ent_avail(ent, queue); + spin_unlock(&queue->lock); + + if (!ring->ready) { + bool ready = is_ring_ready(ring, queue->qid); + + if (ready) { + WRITE_ONCE(fiq->ops, &fuse_io_uring_ops); + WRITE_ONCE(ring->ready, true); + wake_up_all(&fc->blocked_waitq); + } + } +} + +/* + * sqe->addr is a ptr to an iovec array, iov[0] has the headers, iov[1] + * the payload + */ +static int fuse_uring_get_iovec_from_sqe(const struct io_uring_sqe *sqe, + struct iovec iov[FUSE_URING_IOV_SEGS]) +{ + struct iovec __user *uiov = u64_to_user_ptr(READ_ONCE(sqe->addr)); + struct iov_iter iter; + ssize_t ret; + + if (sqe->len != FUSE_URING_IOV_SEGS) + return -EINVAL; + + /* + * Direction for buffer access will actually be READ and WRITE, + * using write for the import should include READ access as well. + */ + ret = import_iovec(WRITE, uiov, FUSE_URING_IOV_SEGS, + FUSE_URING_IOV_SEGS, &iov, &iter); + if (ret < 0) + return ret; + + return 0; +} + +static struct fuse_ring_ent * +fuse_uring_create_ring_ent(struct io_uring_cmd *cmd, + struct fuse_ring_queue *queue) +{ + struct fuse_ring *ring = queue->ring; + struct fuse_ring_ent *ent; + size_t payload_size; + struct iovec iov[FUSE_URING_IOV_SEGS]; + int err; + + err = fuse_uring_get_iovec_from_sqe(cmd->sqe, iov); + if (err) { + pr_info_ratelimited("Failed to get iovec from sqe, err=%d\n", + err); + return ERR_PTR(err); + } + + err = -EINVAL; + if (iov[0].iov_len < sizeof(struct fuse_uring_req_header)) { + pr_info_ratelimited("Invalid header len %zu\n", iov[0].iov_len); + return ERR_PTR(err); + } + + payload_size = iov[1].iov_len; + if (payload_size < ring->max_payload_sz) { + pr_info_ratelimited("Invalid req payload len %zu\n", + payload_size); + return ERR_PTR(err); + } + + err = -ENOMEM; + ent = kzalloc(sizeof(*ent), GFP_KERNEL_ACCOUNT); + if (!ent) + return ERR_PTR(err); + + INIT_LIST_HEAD(&ent->list); + + ent->queue = queue; + ent->headers = iov[0].iov_base; + ent->payload = iov[1].iov_base; + + atomic_inc(&ring->queue_refs); + return ent; +} + +/* + * Register header and payload buffer with the kernel and puts the + * entry as "ready to get fuse requests" on the queue + */ +static int fuse_uring_register(struct io_uring_cmd *cmd, + unsigned int issue_flags, struct fuse_conn *fc) +{ + const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe); + struct fuse_ring *ring = fc->ring; + struct fuse_ring_queue *queue; + struct fuse_ring_ent *ent; + int err; + unsigned int qid = READ_ONCE(cmd_req->qid); + + err = -ENOMEM; + if (!ring) { + ring = fuse_uring_create(fc); + if (!ring) + return err; + } + + if (qid >= ring->nr_queues) { + pr_info_ratelimited("fuse: Invalid ring qid %u\n", qid); + return -EINVAL; + } + + queue = ring->queues[qid]; + if (!queue) { + queue = fuse_uring_create_queue(ring, qid); + if (!queue) + return err; + } + + /* + * The created queue above does not need to be destructed in + * case of entry errors below, will be done at ring destruction time. + */ + + ent = fuse_uring_create_ring_ent(cmd, queue); + if (IS_ERR(ent)) + return PTR_ERR(ent); + + fuse_uring_do_register(ent, cmd, issue_flags); + + return 0; +} + +/* + * Entry function from io_uring to handle the given passthrough command + * (op code IORING_OP_URING_CMD) + */ +int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) +{ + struct fuse_dev *fud; + struct fuse_conn *fc; + u32 cmd_op = cmd->cmd_op; + int err; + + if ((unlikely(issue_flags & IO_URING_F_CANCEL))) { + fuse_uring_cancel(cmd, issue_flags); + return 0; + } + + /* This extra SQE size holds struct fuse_uring_cmd_req */ + if (!(issue_flags & IO_URING_F_SQE128)) + return -EINVAL; + + fud = fuse_get_dev(cmd->file); + if (!fud) { + pr_info_ratelimited("No fuse device found\n"); + return -ENOTCONN; + } + fc = fud->fc; + + /* Once a connection has io-uring enabled on it, it can't be disabled */ + if (!enable_uring && !fc->io_uring) { + pr_info_ratelimited("fuse-io-uring is disabled\n"); + return -EOPNOTSUPP; + } + + if (fc->aborted) + return -ECONNABORTED; + if (!fc->connected) + return -ENOTCONN; + + /* + * fuse_uring_register() needs the ring to be initialized, + * we need to know the max payload size + */ + if (!fc->initialized) + return -EAGAIN; + + switch (cmd_op) { + case FUSE_IO_URING_CMD_REGISTER: + err = fuse_uring_register(cmd, issue_flags, fc); + if (err) { + pr_info_once("FUSE_IO_URING_CMD_REGISTER failed err=%d\n", + err); + fc->io_uring = 0; + wake_up_all(&fc->blocked_waitq); + return err; + } + break; + case FUSE_IO_URING_CMD_COMMIT_AND_FETCH: + err = fuse_uring_commit_fetch(cmd, issue_flags, fc); + if (err) { + pr_info_once("FUSE_IO_URING_COMMIT_AND_FETCH failed err=%d\n", + err); + return err; + } + break; + default: + return -EINVAL; + } + + return -EIOCBQUEUED; +} + +static void fuse_uring_send(struct fuse_ring_ent *ent, struct io_uring_cmd *cmd, + ssize_t ret, unsigned int issue_flags) +{ + struct fuse_ring_queue *queue = ent->queue; + + spin_lock(&queue->lock); + ent->state = FRRS_USERSPACE; + list_move(&ent->list, &queue->ent_in_userspace); + ent->cmd = NULL; + spin_unlock(&queue->lock); + + io_uring_cmd_done(cmd, ret, 0, issue_flags); +} + +/* + * This prepares and sends the ring request in fuse-uring task context. + * User buffers are not mapped yet - the application does not have permission + * to write to it - this has to be executed in ring task context. + */ +static void fuse_uring_send_in_task(struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ + struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd); + struct fuse_ring_queue *queue = ent->queue; + int err; + + if (!(issue_flags & IO_URING_F_TASK_DEAD)) { + err = fuse_uring_prepare_send(ent, ent->fuse_req); + if (err) { + fuse_uring_next_fuse_req(ent, queue, issue_flags); + return; + } + } else { + err = -ECANCELED; + } + + fuse_uring_send(ent, cmd, err, issue_flags); +} + +static struct fuse_ring_queue *fuse_uring_task_to_queue(struct fuse_ring *ring) +{ + unsigned int qid; + struct fuse_ring_queue *queue; + + qid = task_cpu(current); + + if (WARN_ONCE(qid >= ring->nr_queues, + "Core number (%u) exceeds nr queues (%zu)\n", qid, + ring->nr_queues)) + qid = 0; + + queue = ring->queues[qid]; + WARN_ONCE(!queue, "Missing queue for qid %d\n", qid); + + return queue; +} + +static void fuse_uring_dispatch_ent(struct fuse_ring_ent *ent) +{ + struct io_uring_cmd *cmd = ent->cmd; + + uring_cmd_set_ring_ent(cmd, ent); + io_uring_cmd_complete_in_task(cmd, fuse_uring_send_in_task); +} + +/* queue a fuse request and send it if a ring entry is available */ +void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req) +{ + struct fuse_conn *fc = req->fm->fc; + struct fuse_ring *ring = fc->ring; + struct fuse_ring_queue *queue; + struct fuse_ring_ent *ent = NULL; + int err; + + err = -EINVAL; + queue = fuse_uring_task_to_queue(ring); + if (!queue) + goto err; + + if (req->in.h.opcode != FUSE_NOTIFY_REPLY) + req->in.h.unique = fuse_get_unique(fiq); + + spin_lock(&queue->lock); + err = -ENOTCONN; + if (unlikely(queue->stopped)) + goto err_unlock; + + ent = list_first_entry_or_null(&queue->ent_avail_queue, + struct fuse_ring_ent, list); + if (ent) + fuse_uring_add_req_to_ring_ent(ent, req); + else + list_add_tail(&req->list, &queue->fuse_req_queue); + spin_unlock(&queue->lock); + + if (ent) + fuse_uring_dispatch_ent(ent); + + return; + +err_unlock: + spin_unlock(&queue->lock); +err: + req->out.h.error = err; + clear_bit(FR_PENDING, &req->flags); + fuse_request_end(req); +} + +bool fuse_uring_queue_bq_req(struct fuse_req *req) +{ + struct fuse_conn *fc = req->fm->fc; + struct fuse_ring *ring = fc->ring; + struct fuse_ring_queue *queue; + struct fuse_ring_ent *ent = NULL; + + queue = fuse_uring_task_to_queue(ring); + if (!queue) + return false; + + spin_lock(&queue->lock); + if (unlikely(queue->stopped)) { + spin_unlock(&queue->lock); + return false; + } + + list_add_tail(&req->list, &queue->fuse_req_bg_queue); + + ent = list_first_entry_or_null(&queue->ent_avail_queue, + struct fuse_ring_ent, list); + spin_lock(&fc->bg_lock); + fc->num_background++; + if (fc->num_background == fc->max_background) + fc->blocked = 1; + fuse_uring_flush_bg(queue); + spin_unlock(&fc->bg_lock); + + /* + * Due to bg_queue flush limits there might be other bg requests + * in the queue that need to be handled first. Or no further req + * might be available. + */ + req = list_first_entry_or_null(&queue->fuse_req_queue, struct fuse_req, + list); + if (ent && req) { + fuse_uring_add_req_to_ring_ent(ent, req); + spin_unlock(&queue->lock); + + fuse_uring_dispatch_ent(ent); + } else { + spin_unlock(&queue->lock); + } + + return true; +} + +static const struct fuse_iqueue_ops fuse_io_uring_ops = { + /* should be send over io-uring as enhancement */ + .send_forget = fuse_dev_queue_forget, + + /* + * could be send over io-uring, but interrupts should be rare, + * no need to make the code complex + */ + .send_interrupt = fuse_dev_queue_interrupt, + .send_req = fuse_uring_queue_fuse_req, +}; diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h new file mode 100644 index 000000000000..2102b3d0c1ae --- /dev/null +++ b/fs/fuse/dev_uring_i.h @@ -0,0 +1,205 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * FUSE: Filesystem in Userspace + * Copyright (c) 2023-2024 DataDirect Networks. + */ + +#ifndef _FS_FUSE_DEV_URING_I_H +#define _FS_FUSE_DEV_URING_I_H + +#include "fuse_i.h" + +#ifdef CONFIG_FUSE_IO_URING + +#define FUSE_URING_TEARDOWN_TIMEOUT (5 * HZ) +#define FUSE_URING_TEARDOWN_INTERVAL (HZ/20) + +enum fuse_ring_req_state { + FRRS_INVALID = 0, + + /* The ring entry received from userspace and it is being processed */ + FRRS_COMMIT, + + /* The ring entry is waiting for new fuse requests */ + FRRS_AVAILABLE, + + /* The ring entry got assigned a fuse req */ + FRRS_FUSE_REQ, + + /* The ring entry is in or on the way to user space */ + FRRS_USERSPACE, + + /* The ring entry is in teardown */ + FRRS_TEARDOWN, + + /* The ring entry is released, but not freed yet */ + FRRS_RELEASED, +}; + +/** A fuse ring entry, part of the ring queue */ +struct fuse_ring_ent { + /* userspace buffer */ + struct fuse_uring_req_header __user *headers; + void __user *payload; + + /* the ring queue that owns the request */ + struct fuse_ring_queue *queue; + + /* fields below are protected by queue->lock */ + + struct io_uring_cmd *cmd; + + struct list_head list; + + enum fuse_ring_req_state state; + + struct fuse_req *fuse_req; +}; + +struct fuse_ring_queue { + /* + * back pointer to the main fuse uring structure that holds this + * queue + */ + struct fuse_ring *ring; + + /* queue id, corresponds to the cpu core */ + unsigned int qid; + + /* + * queue lock, taken when any value in the queue changes _and_ also + * a ring entry state changes. + */ + spinlock_t lock; + + /* available ring entries (struct fuse_ring_ent) */ + struct list_head ent_avail_queue; + + /* + * entries in the process of being committed or in the process + * to be sent to userspace + */ + struct list_head ent_w_req_queue; + struct list_head ent_commit_queue; + + /* entries in userspace */ + struct list_head ent_in_userspace; + + /* entries that are released */ + struct list_head ent_released; + + /* fuse requests waiting for an entry slot */ + struct list_head fuse_req_queue; + + /* background fuse requests */ + struct list_head fuse_req_bg_queue; + + struct fuse_pqueue fpq; + + unsigned int active_background; + + bool stopped; +}; + +/** + * Describes if uring is for communication and holds alls the data needed + * for uring communication + */ +struct fuse_ring { + /* back pointer */ + struct fuse_conn *fc; + + /* number of ring queues */ + size_t nr_queues; + + /* maximum payload/arg size */ + size_t max_payload_sz; + + struct fuse_ring_queue **queues; + + /* + * Log ring entry states on stop when entries cannot be released + */ + unsigned int stop_debug_log : 1; + + wait_queue_head_t stop_waitq; + + /* async tear down */ + struct delayed_work async_teardown_work; + + /* log */ + unsigned long teardown_time; + + atomic_t queue_refs; + + bool ready; +}; + +bool fuse_uring_enabled(void); +void fuse_uring_destruct(struct fuse_conn *fc); +void fuse_uring_stop_queues(struct fuse_ring *ring); +void fuse_uring_abort_end_requests(struct fuse_ring *ring); +int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags); +void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req); +bool fuse_uring_queue_bq_req(struct fuse_req *req); + +static inline void fuse_uring_abort(struct fuse_conn *fc) +{ + struct fuse_ring *ring = fc->ring; + + if (ring == NULL) + return; + + if (atomic_read(&ring->queue_refs) > 0) { + fuse_uring_abort_end_requests(ring); + fuse_uring_stop_queues(ring); + } +} + +static inline void fuse_uring_wait_stopped_queues(struct fuse_conn *fc) +{ + struct fuse_ring *ring = fc->ring; + + if (ring) + wait_event(ring->stop_waitq, + atomic_read(&ring->queue_refs) == 0); +} + +static inline bool fuse_uring_ready(struct fuse_conn *fc) +{ + return fc->ring && fc->ring->ready; +} + +#else /* CONFIG_FUSE_IO_URING */ + +struct fuse_ring; + +static inline void fuse_uring_create(struct fuse_conn *fc) +{ +} + +static inline void fuse_uring_destruct(struct fuse_conn *fc) +{ +} + +static inline bool fuse_uring_enabled(void) +{ + return false; +} + +static inline void fuse_uring_abort(struct fuse_conn *fc) +{ +} + +static inline void fuse_uring_wait_stopped_queues(struct fuse_conn *fc) +{ +} + +static inline bool fuse_uring_ready(struct fuse_conn *fc) +{ + return false; +} + +#endif /* CONFIG_FUSE_IO_URING */ + +#endif /* _FS_FUSE_DEV_URING_I_H */ diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index bf057cf7098d..198862b086ff 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -175,9 +175,12 @@ static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args, memset(outarg, 0, sizeof(struct fuse_entry_out)); args->opcode = FUSE_LOOKUP; args->nodeid = nodeid; - args->in_numargs = 1; - args->in_args[0].size = name->len + 1; - args->in_args[0].value = name->name; + args->in_numargs = 3; + fuse_set_zero_arg0(args); + args->in_args[1].size = name->len; + args->in_args[1].value = name->name; + args->in_args[2].size = 1; + args->in_args[2].value = ""; args->out_numargs = 1; args->out_args[0].size = sizeof(struct fuse_entry_out); args->out_args[0].value = outarg; @@ -192,10 +195,10 @@ static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args, * the lookup once more. If the lookup results in the same inode, * then refresh the attributes, timeouts and mark the dentry valid. */ -static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) +static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *entry, unsigned int flags) { struct inode *inode; - struct dentry *parent; struct fuse_mount *fm; struct fuse_inode *fi; int ret; @@ -227,11 +230,9 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) attr_version = fuse_get_attr_version(fm->fc); - parent = dget_parent(entry); - fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)), - &entry->d_name, &outarg); + fuse_lookup_init(fm->fc, &args, get_node_id(dir), + name, &outarg); ret = fuse_simple_request(fm, &args); - dput(parent); /* Zero nodeid is same as -ENOENT */ if (!ret && !outarg.nodeid) ret = -ENOENT; @@ -265,9 +266,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state)) return -ECHILD; } else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) { - parent = dget_parent(entry); - fuse_advise_use_readdirplus(d_inode(parent)); - dput(parent); + fuse_advise_use_readdirplus(dir); } } ret = 1; @@ -929,11 +928,12 @@ static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir, FUSE_ARGS(args); args.opcode = FUSE_SYMLINK; - args.in_numargs = 2; - args.in_args[0].size = entry->d_name.len + 1; - args.in_args[0].value = entry->d_name.name; - args.in_args[1].size = len; - args.in_args[1].value = link; + args.in_numargs = 3; + fuse_set_zero_arg0(&args); + args.in_args[1].size = entry->d_name.len + 1; + args.in_args[1].value = entry->d_name.name; + args.in_args[2].size = len; + args.in_args[2].value = link; return create_new_entry(idmap, fm, &args, dir, entry, S_IFLNK); } @@ -993,9 +993,10 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) args.opcode = FUSE_UNLINK; args.nodeid = get_node_id(dir); - args.in_numargs = 1; - args.in_args[0].size = entry->d_name.len + 1; - args.in_args[0].value = entry->d_name.name; + args.in_numargs = 2; + fuse_set_zero_arg0(&args); + args.in_args[1].size = entry->d_name.len + 1; + args.in_args[1].value = entry->d_name.name; err = fuse_simple_request(fm, &args); if (!err) { fuse_dir_changed(dir); @@ -1016,9 +1017,10 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) args.opcode = FUSE_RMDIR; args.nodeid = get_node_id(dir); - args.in_numargs = 1; - args.in_args[0].size = entry->d_name.len + 1; - args.in_args[0].value = entry->d_name.name; + args.in_numargs = 2; + fuse_set_zero_arg0(&args); + args.in_args[1].size = entry->d_name.len + 1; + args.in_args[1].value = entry->d_name.name; err = fuse_simple_request(fm, &args); if (!err) { fuse_dir_changed(dir); diff --git a/fs/fuse/fuse_dev_i.h b/fs/fuse/fuse_dev_i.h new file mode 100644 index 000000000000..3b2bfe1248d3 --- /dev/null +++ b/fs/fuse/fuse_dev_i.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * FUSE: Filesystem in Userspace + * Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> + */ +#ifndef _FS_FUSE_DEV_I_H +#define _FS_FUSE_DEV_I_H + +#include <linux/types.h> + +/* Ordinary requests have even IDs, while interrupts IDs are odd */ +#define FUSE_INT_REQ_BIT (1ULL << 0) +#define FUSE_REQ_ID_STEP (1ULL << 1) + +struct fuse_arg; +struct fuse_args; +struct fuse_pqueue; +struct fuse_req; +struct fuse_iqueue; +struct fuse_forget_link; + +struct fuse_copy_state { + int write; + struct fuse_req *req; + struct iov_iter *iter; + struct pipe_buffer *pipebufs; + struct pipe_buffer *currbuf; + struct pipe_inode_info *pipe; + unsigned long nr_segs; + struct page *pg; + unsigned int len; + unsigned int offset; + unsigned int move_pages:1; + unsigned int is_uring:1; + struct { + unsigned int copied_sz; /* copied size into the user buffer */ + } ring; +}; + +static inline struct fuse_dev *fuse_get_dev(struct file *file) +{ + /* + * Lockless access is OK, because file->private data is set + * once during mount and is valid until the file is released. + */ + return READ_ONCE(file->private_data); +} + +unsigned int fuse_req_hash(u64 unique); +struct fuse_req *fuse_request_find(struct fuse_pqueue *fpq, u64 unique); + +void fuse_dev_end_requests(struct list_head *head); + +void fuse_copy_init(struct fuse_copy_state *cs, int write, + struct iov_iter *iter); +int fuse_copy_args(struct fuse_copy_state *cs, unsigned int numargs, + unsigned int argpages, struct fuse_arg *args, + int zeroing); +int fuse_copy_out_args(struct fuse_copy_state *cs, struct fuse_args *args, + unsigned int nbytes); +void fuse_dev_queue_forget(struct fuse_iqueue *fiq, + struct fuse_forget_link *forget); +void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req); + +#endif + diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 74744c6f2860..fee96fe7887b 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -310,7 +310,7 @@ struct fuse_args { bool is_ext:1; bool is_pinned:1; bool invalidate_vmap:1; - struct fuse_in_arg in_args[3]; + struct fuse_in_arg in_args[4]; struct fuse_arg out_args[2]; void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error); /* Used for kvec iter backed by vmalloc address */ @@ -438,6 +438,10 @@ struct fuse_req { /** fuse_mount this request belongs to */ struct fuse_mount *fm; + +#ifdef CONFIG_FUSE_IO_URING + void *ring_entry; +#endif }; struct fuse_iqueue; @@ -863,6 +867,9 @@ struct fuse_conn { /* Use pages instead of pointer for kernel I/O */ unsigned int use_pages_for_kvec_io:1; + /* Use io_uring for communication */ + unsigned int io_uring; + /** Maximum stack depth for passthrough backing files */ int max_stack_depth; @@ -923,6 +930,11 @@ struct fuse_conn { /** IDR for backing files ids */ struct idr backing_files_map; #endif + +#ifdef CONFIG_FUSE_IO_URING + /** uring connection information*/ + struct fuse_ring *ring; +#endif }; /* @@ -947,6 +959,19 @@ struct fuse_mount { struct rcu_head rcu; }; +/* + * Empty header for FUSE opcodes without specific header needs. + * Used as a placeholder in args->in_args[0] for consistency + * across all FUSE operations, simplifying request handling. + */ +struct fuse_zero_header {}; + +static inline void fuse_set_zero_arg0(struct fuse_args *args) +{ + args->in_args[0].size = sizeof(struct fuse_zero_header); + args->in_args[0].value = NULL; +} + static inline struct fuse_mount *get_fuse_mount_super(struct super_block *sb) { return sb->s_fs_info; @@ -1220,6 +1245,11 @@ void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o); struct fuse_conn *fuse_conn_get(struct fuse_conn *fc); /** + * Initialize the fuse processing queue + */ +void fuse_pqueue_init(struct fuse_pqueue *fpq); + +/** * Initialize fuse_conn */ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 3ce4f4e81d09..e9db2cb8c150 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -7,6 +7,7 @@ */ #include "fuse_i.h" +#include "dev_uring_i.h" #include <linux/pagemap.h> #include <linux/slab.h> @@ -937,7 +938,7 @@ static void fuse_iqueue_init(struct fuse_iqueue *fiq, fiq->priv = priv; } -static void fuse_pqueue_init(struct fuse_pqueue *fpq) +void fuse_pqueue_init(struct fuse_pqueue *fpq) { unsigned int i; @@ -992,6 +993,8 @@ static void delayed_release(struct rcu_head *p) { struct fuse_conn *fc = container_of(p, struct fuse_conn, rcu); + fuse_uring_destruct(fc); + put_user_ns(fc->user_ns); fc->release(fc); } @@ -1387,6 +1390,8 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, else ok = false; } + if (flags & FUSE_OVER_IO_URING && fuse_uring_enabled()) + fc->io_uring = 1; } else { ra_pages = fc->max_read / PAGE_SIZE; fc->no_lock = 1; @@ -1446,6 +1451,13 @@ void fuse_send_init(struct fuse_mount *fm) if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) flags |= FUSE_PASSTHROUGH; + /* + * This is just an information flag for fuse server. No need to check + * the reply - server is either sending IORING_OP_URING_CMD or not. + */ + if (fuse_uring_enabled()) + flags |= FUSE_OVER_IO_URING; + ia->in.flags = flags; ia->in.flags2 = flags >> 32; diff --git a/fs/fuse/sysctl.c b/fs/fuse/sysctl.c index b272bb333005..63fb1e5bee30 100644 --- a/fs/fuse/sysctl.c +++ b/fs/fuse/sysctl.c @@ -13,7 +13,7 @@ static struct ctl_table_header *fuse_table_header; /* Bound by fuse_init_out max_pages, which is a u16 */ static unsigned int sysctl_fuse_max_pages_limit = 65535; -static struct ctl_table fuse_sysctl_table[] = { +static const struct ctl_table fuse_sysctl_table[] = { { .procname = "max_pages_limit", .data = &fuse_max_pages_limit, diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c index 9f568d345c51..93dfb06b6cea 100644 --- a/fs/fuse/xattr.c +++ b/fs/fuse/xattr.c @@ -164,9 +164,10 @@ int fuse_removexattr(struct inode *inode, const char *name) args.opcode = FUSE_REMOVEXATTR; args.nodeid = get_node_id(inode); - args.in_numargs = 1; - args.in_args[0].size = strlen(name) + 1; - args.in_args[0].value = name; + args.in_numargs = 2; + fuse_set_zero_arg0(&args); + args.in_args[1].size = strlen(name) + 1; + args.in_args[1].value = name; err = fuse_simple_request(fm, &args); if (err == -ENOSYS) { fm->fc->no_removexattr = 1; diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index 2e215e8c3c88..95050e719233 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -21,7 +21,9 @@ /** * gfs2_drevalidate - Check directory lookup consistency - * @dentry: the mapping to check + * @dir: expected parent directory inode + * @name: expexted name + * @dentry: dentry to check * @flags: lookup flags * * Check to make sure the lookup necessary to arrive at this inode from its @@ -30,50 +32,43 @@ * Returns: 1 if the dentry is ok, 0 if it isn't */ -static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags) +static int gfs2_drevalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { - struct dentry *parent; - struct gfs2_sbd *sdp; - struct gfs2_inode *dip; + struct gfs2_sbd *sdp = GFS2_SB(dir); + struct gfs2_inode *dip = GFS2_I(dir); struct inode *inode; struct gfs2_holder d_gh; struct gfs2_inode *ip = NULL; - int error, valid = 0; + int error, valid; int had_lock = 0; if (flags & LOOKUP_RCU) return -ECHILD; - parent = dget_parent(dentry); - sdp = GFS2_SB(d_inode(parent)); - dip = GFS2_I(d_inode(parent)); inode = d_inode(dentry); if (inode) { if (is_bad_inode(inode)) - goto out; + return 0; ip = GFS2_I(inode); } - if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) { - valid = 1; - goto out; - } + if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) + return 1; had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL); if (!had_lock) { error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); if (error) - goto out; + return 0; } - error = gfs2_dir_check(d_inode(parent), &dentry->d_name, ip); + error = gfs2_dir_check(dir, name, ip); valid = inode ? !error : (error == -ENOENT); if (!had_lock) gfs2_glock_dq_uninit(&d_gh); -out: - dput(parent); return valid; } diff --git a/fs/hfs/sysdep.c b/fs/hfs/sysdep.c index 76fa02e3835b..ef54fc8093cf 100644 --- a/fs/hfs/sysdep.c +++ b/fs/hfs/sysdep.c @@ -13,7 +13,8 @@ /* dentry case-handling: just lowercase everything */ -static int hfs_revalidate_dentry(struct dentry *dentry, unsigned int flags) +static int hfs_revalidate_dentry(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { struct inode *inode; int diff; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 7e51d2cec64b..e0741e468956 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -95,32 +95,17 @@ __uml_setup("hostfs=", hostfs_args, static char *__dentry_name(struct dentry *dentry, char *name) { char *p = dentry_path_raw(dentry, name, PATH_MAX); - char *root; - size_t len; - struct hostfs_fs_info *fsi; - - fsi = dentry->d_sb->s_fs_info; - root = fsi->host_root_path; - len = strlen(root); - if (IS_ERR(p)) { - __putname(name); - return NULL; - } + struct hostfs_fs_info *fsi = dentry->d_sb->s_fs_info; + char *root = fsi->host_root_path; + size_t len = strlen(root); - /* - * This function relies on the fact that dentry_path_raw() will place - * the path name at the end of the provided buffer. - */ - BUG_ON(p + strlen(p) + 1 != name + PATH_MAX); - - strscpy(name, root, PATH_MAX); - if (len > p - name) { + if (IS_ERR(p) || len > p - name) { __putname(name); return NULL; } - if (p > name + len) - strcpy(name + len, p); + memcpy(name, root, len); + memmove(name + len, p, name + PATH_MAX - p); return name; } @@ -410,38 +395,33 @@ static const struct file_operations hostfs_dir_fops = { .fsync = hostfs_fsync, }; -static int hostfs_writepage(struct page *page, struct writeback_control *wbc) +static int hostfs_writepages(struct address_space *mapping, + struct writeback_control *wbc) { - struct address_space *mapping = page->mapping; struct inode *inode = mapping->host; - char *buffer; - loff_t base = page_offset(page); - int count = PAGE_SIZE; - int end_index = inode->i_size >> PAGE_SHIFT; - int err; - - if (page->index >= end_index) - count = inode->i_size & (PAGE_SIZE-1); - - buffer = kmap_local_page(page); - - err = write_file(HOSTFS_I(inode)->fd, &base, buffer, count); - if (err != count) { - if (err >= 0) - err = -EIO; - mapping_set_error(mapping, err); - goto out; + struct folio *folio = NULL; + loff_t i_size = i_size_read(inode); + int err = 0; + + while ((folio = writeback_iter(mapping, wbc, folio, &err))) { + loff_t pos = folio_pos(folio); + size_t count = folio_size(folio); + char *buffer; + int ret; + + if (count > i_size - pos) + count = i_size - pos; + + buffer = kmap_local_folio(folio, 0); + ret = write_file(HOSTFS_I(inode)->fd, &pos, buffer, count); + kunmap_local(buffer); + folio_unlock(folio); + if (ret != count) { + err = ret < 0 ? ret : -EIO; + mapping_set_error(mapping, err); + } } - if (base > inode->i_size) - inode->i_size = base; - - err = 0; - - out: - kunmap_local(buffer); - unlock_page(page); - return err; } @@ -506,11 +486,12 @@ static int hostfs_write_end(struct file *file, struct address_space *mapping, } static const struct address_space_operations hostfs_aops = { - .writepage = hostfs_writepage, + .writepages = hostfs_writepages, .read_folio = hostfs_read_folio, .dirty_folio = filemap_dirty_folio, .write_begin = hostfs_write_begin, .write_end = hostfs_write_end, + .migrate_folio = filemap_migrate_folio, }; static int hostfs_inode_update(struct inode *ino, const struct hostfs_stat *st) diff --git a/fs/inode.c b/fs/inode.c index 6b4c77268fc0..5587aabdaa5e 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -184,7 +184,7 @@ static int proc_nr_inodes(const struct ctl_table *table, int write, void *buffer return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } -static struct ctl_table inodes_sysctls[] = { +static const struct ctl_table inodes_sysctls[] = { { .procname = "inode-nr", .data = &inodes_stat, diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index d68a4e6ac345..fc8ede43afde 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1576,7 +1576,8 @@ out: return result; } -static int jfs_ci_revalidate(struct dentry *dentry, unsigned int flags) +static int jfs_ci_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { /* * This is not negative dentry. Always valid. diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 458519e416fe..5f0f8b95f44c 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -1109,7 +1109,8 @@ struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, return ERR_PTR(rc); } -static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags) +static int kernfs_dop_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { struct kernfs_node *kn; struct kernfs_root *root; diff --git a/fs/libfs.c b/fs/libfs.c index 5b6120b19e99..8444f5cc4064 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1782,7 +1782,7 @@ int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, { const struct dentry *parent; const struct inode *dir; - char strbuf[DNAME_INLINE_LEN]; + union shortname_store strbuf; struct qstr qstr; /* @@ -1802,22 +1802,23 @@ int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, if (!dir || !IS_CASEFOLDED(dir)) return 1; + qstr.len = len; + qstr.name = str; /* * If the dentry name is stored in-line, then it may be concurrently * modified by a rename. If this happens, the VFS will eventually retry * the lookup, so it doesn't matter what ->d_compare() returns. * However, it's unsafe to call utf8_strncasecmp() with an unstable * string. Therefore, we have to copy the name into a temporary buffer. + * As above, len is guaranteed to match str, so the shortname case + * is exactly when str points to ->d_shortname. */ - if (len <= DNAME_INLINE_LEN - 1) { - memcpy(strbuf, str, len); - strbuf[len] = 0; - str = strbuf; + if (qstr.name == dentry->d_shortname.string) { + strbuf = dentry->d_shortname; // NUL is guaranteed to be in there + qstr.name = strbuf.string; /* prevent compiler from optimizing out the temporary buffer */ barrier(); } - qstr.len = len; - qstr.name = str; return utf8_strncasecmp(dentry->d_sb->s_encoding, name, &qstr); } diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 7ded57ec3a60..2c8eedc6c2cc 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -412,7 +412,7 @@ EXPORT_SYMBOL_GPL(lockd_down); * Sysctl parameters (same as module parameters, different interface). */ -static struct ctl_table nlm_sysctls[] = { +static const struct ctl_table nlm_sysctls[] = { { .procname = "nlm_grace_period", .data = &nlm_grace_period, diff --git a/fs/locks.c b/fs/locks.c index 25afc8d9c9d1..1619cddfa7a4 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -97,7 +97,7 @@ static int leases_enable = 1; static int lease_break_time = 45; #ifdef CONFIG_SYSCTL -static struct ctl_table locks_sysctls[] = { +static const struct ctl_table locks_sysctls[] = { { .procname = "leases-enable", .data = &leases_enable, diff --git a/fs/namei.c b/fs/namei.c index e56c29a22d26..3ab9440c5b93 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -921,10 +921,11 @@ out_dput: return false; } -static inline int d_revalidate(struct dentry *dentry, unsigned int flags) +static inline int d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) - return dentry->d_op->d_revalidate(dentry, flags); + return dentry->d_op->d_revalidate(dir, name, dentry, flags); else return 1; } @@ -1099,7 +1100,7 @@ static int sysctl_protected_fifos __read_mostly; static int sysctl_protected_regular __read_mostly; #ifdef CONFIG_SYSCTL -static struct ctl_table namei_sysctls[] = { +static const struct ctl_table namei_sysctls[] = { { .procname = "protected_symlinks", .data = &sysctl_protected_symlinks, @@ -1652,7 +1653,7 @@ static struct dentry *lookup_dcache(const struct qstr *name, { struct dentry *dentry = d_lookup(dir, name); if (dentry) { - int error = d_revalidate(dentry, flags); + int error = d_revalidate(dir->d_inode, name, dentry, flags); if (unlikely(error <= 0)) { if (!error) d_invalidate(dentry); @@ -1737,19 +1738,20 @@ static struct dentry *lookup_fast(struct nameidata *nd) if (read_seqcount_retry(&parent->d_seq, nd->seq)) return ERR_PTR(-ECHILD); - status = d_revalidate(dentry, nd->flags); + status = d_revalidate(nd->inode, &nd->last, dentry, nd->flags); if (likely(status > 0)) return dentry; if (!try_to_unlazy_next(nd, dentry)) return ERR_PTR(-ECHILD); if (status == -ECHILD) /* we'd been told to redo it in non-rcu mode */ - status = d_revalidate(dentry, nd->flags); + status = d_revalidate(nd->inode, &nd->last, + dentry, nd->flags); } else { dentry = __d_lookup(parent, &nd->last); if (unlikely(!dentry)) return NULL; - status = d_revalidate(dentry, nd->flags); + status = d_revalidate(nd->inode, &nd->last, dentry, nd->flags); } if (unlikely(status <= 0)) { if (!status) @@ -1777,7 +1779,7 @@ again: if (IS_ERR(dentry)) return dentry; if (unlikely(!d_in_lookup(dentry))) { - int error = d_revalidate(dentry, flags); + int error = d_revalidate(inode, name, dentry, flags); if (unlikely(error <= 0)) { if (!error) { d_invalidate(dentry); @@ -3575,7 +3577,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, if (d_in_lookup(dentry)) break; - error = d_revalidate(dentry, nd->flags); + error = d_revalidate(dir_inode, &nd->last, dentry, nd->flags); if (likely(error > 0)) break; if (error) diff --git a/fs/namespace.c b/fs/namespace.c index 4013fbac354a..a3ed3f2980cb 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -5985,7 +5985,7 @@ const struct proc_ns_operations mntns_operations = { }; #ifdef CONFIG_SYSCTL -static struct ctl_table fs_namespace_sysctls[] = { +static const struct ctl_table fs_namespace_sysctls[] = { { .procname = "mount-max", .data = &sysctl_mount_max, diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 0eb20012792f..d3f76101ad4b 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -170,7 +170,8 @@ config ROOT_NFS config NFS_FSCACHE bool "Provide NFS client caching support" - depends on NFS_FS=m && NETFS_SUPPORT || NFS_FS=y && NETFS_SUPPORT=y + depends on NFS_FS + select NETFS_SUPPORT select FSCACHE help Say Y here if you want NFS data to be cached locally on disc through diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 7832fb0369a1..8397c43358bd 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -718,7 +718,7 @@ __be32 nfs4_callback_offload(void *data, void *dummy, copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_KERNEL); if (!copy) - return htonl(NFS4ERR_SERVERFAULT); + return cpu_to_be32(NFS4ERR_DELAY); spin_lock(&cps->clp->cl_lock); rcu_read_lock(); diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 550ca934c9cf..3b0918ade53c 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -38,7 +38,7 @@ #include <linux/sunrpc/bc_xprt.h> #include <linux/nsproxy.h> #include <linux/pid_namespace.h> - +#include <linux/nfslocalio.h> #include "nfs4_fs.h" #include "callback.h" @@ -186,7 +186,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) seqlock_init(&clp->cl_boot_lock); ktime_get_real_ts64(&clp->cl_nfssvc_boot); nfs_uuid_init(&clp->cl_uuid); - spin_lock_init(&clp->cl_localio_lock); + INIT_WORK(&clp->cl_local_probe_work, nfs_local_probe_async_work); #endif /* CONFIG_NFS_LOCALIO */ clp->cl_principal = "*"; @@ -244,7 +244,7 @@ static void pnfs_init_server(struct nfs_server *server) */ void nfs_free_client(struct nfs_client *clp) { - nfs_local_disable(clp); + nfs_localio_disable_client(clp); /* -EIO all pending I/O */ if (!IS_ERR(clp->cl_rpcclient)) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 492cffd9d3d8..2b04038b0e40 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1672,7 +1672,7 @@ nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry, return nfs_lookup_revalidate_done(dir, dentry, inode, 1); } -static int nfs_lookup_revalidate_dentry(struct inode *dir, +static int nfs_lookup_revalidate_dentry(struct inode *dir, const struct qstr *name, struct dentry *dentry, struct inode *inode, unsigned int flags) { @@ -1690,7 +1690,7 @@ static int nfs_lookup_revalidate_dentry(struct inode *dir, goto out; dir_verifier = nfs_save_change_attribute(dir); - ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr); + ret = NFS_PROTO(dir)->lookup(dir, dentry, name, fhandle, fattr); if (ret < 0) goto out; @@ -1732,8 +1732,8 @@ out: * cached dentry and do a new lookup. */ static int -nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, - unsigned int flags) +nfs_do_lookup_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { struct inode *inode; int error = 0; @@ -1775,7 +1775,7 @@ nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, if (NFS_STALE(inode)) goto out_bad; - return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags); + return nfs_lookup_revalidate_dentry(dir, name, dentry, inode, flags); out_valid: return nfs_lookup_revalidate_done(dir, dentry, inode, 1); out_bad: @@ -1785,38 +1785,26 @@ out_bad: } static int -__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags, - int (*reval)(struct inode *, struct dentry *, unsigned int)) +__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) { - struct dentry *parent; - struct inode *dir; - int ret; - if (flags & LOOKUP_RCU) { if (dentry->d_fsdata == NFS_FSDATA_BLOCKED) return -ECHILD; - parent = READ_ONCE(dentry->d_parent); - dir = d_inode_rcu(parent); - if (!dir) - return -ECHILD; - ret = reval(dir, dentry, flags); - if (parent != READ_ONCE(dentry->d_parent)) - return -ECHILD; } else { /* Wait for unlink to complete - see unblock_revalidate() */ wait_var_event(&dentry->d_fsdata, smp_load_acquire(&dentry->d_fsdata) != NFS_FSDATA_BLOCKED); - parent = dget_parent(dentry); - ret = reval(d_inode(parent), dentry, flags); - dput(parent); } - return ret; + return 0; } -static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) +static int nfs_lookup_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { - return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate); + if (__nfs_lookup_revalidate(dentry, flags)) + return -ECHILD; + return nfs_do_lookup_revalidate(dir, name, dentry, flags); } static void block_revalidate(struct dentry *dentry) @@ -1982,7 +1970,8 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in dir_verifier = nfs_save_change_attribute(dir); trace_nfs_lookup_enter(dir, dentry, flags); - error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr); + error = NFS_PROTO(dir)->lookup(dir, dentry, &dentry->d_name, + fhandle, fattr); if (error == -ENOENT) { if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE)) dir_verifier = inode_peek_iversion_raw(dir); @@ -2025,7 +2014,8 @@ void nfs_d_prune_case_insensitive_aliases(struct inode *inode) EXPORT_SYMBOL_GPL(nfs_d_prune_case_insensitive_aliases); #if IS_ENABLED(CONFIG_NFS_V4) -static int nfs4_lookup_revalidate(struct dentry *, unsigned int); +static int nfs4_lookup_revalidate(struct inode *, const struct qstr *, + struct dentry *, unsigned int); const struct dentry_operations nfs4_dentry_operations = { .d_revalidate = nfs4_lookup_revalidate, @@ -2214,11 +2204,14 @@ no_open: EXPORT_SYMBOL_GPL(nfs_atomic_open); static int -nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, - unsigned int flags) +nfs4_lookup_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { struct inode *inode; + if (__nfs_lookup_revalidate(dentry, flags)) + return -ECHILD; + trace_nfs_lookup_revalidate_enter(dir, dentry, flags); if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY)) @@ -2254,16 +2247,10 @@ nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, reval_dentry: if (flags & LOOKUP_RCU) return -ECHILD; - return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags); + return nfs_lookup_revalidate_dentry(dir, name, dentry, inode, flags); full_reval: - return nfs_do_lookup_revalidate(dir, dentry, flags); -} - -static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) -{ - return __nfs_lookup_revalidate(dentry, flags, - nfs4_do_lookup_revalidate); + return nfs_do_lookup_revalidate(dir, name, dentry, flags); } #endif /* CONFIG_NFSV4 */ @@ -2319,7 +2306,8 @@ nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle, d_drop(dentry); if (fhandle->size == 0) { - error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr); + error = NFS_PROTO(dir)->lookup(dir, dentry, &dentry->d_name, + fhandle, fattr); if (error) goto out_error; } diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index b08dbe96bc57..f45beea92d03 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -303,6 +303,7 @@ static void nfs_read_sync_pgio_error(struct list_head *head, int error) static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr) { get_dreq(hdr->dreq); + set_bit(NFS_IOHDR_ODIRECT, &hdr->flags); } static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = { diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index f78115c6c2c1..98b45b636be3 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -164,18 +164,17 @@ decode_name(struct xdr_stream *xdr, u32 *id) } static struct nfsd_file * -ff_local_open_fh(struct nfs_client *clp, const struct cred *cred, +ff_local_open_fh(struct pnfs_layout_segment *lseg, u32 ds_idx, + struct nfs_client *clp, const struct cred *cred, struct nfs_fh *fh, fmode_t mode) { - if (mode & FMODE_WRITE) { - /* - * Always request read and write access since this corresponds - * to a rw layout. - */ - mode |= FMODE_READ; - } +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); - return nfs_local_open_fh(clp, cred, fh, mode); + return nfs_local_open_fh(clp, cred, fh, &mirror->nfl, mode); +#else + return NULL; +#endif } static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1, @@ -247,6 +246,7 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags) spin_lock_init(&mirror->lock); refcount_set(&mirror->ref, 1); INIT_LIST_HEAD(&mirror->mirrors); + nfs_localio_file_init(&mirror->nfl); } return mirror; } @@ -257,6 +257,7 @@ static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror) ff_layout_remove_mirror(mirror); kfree(mirror->fh_versions); + nfs_close_local_fh(&mirror->nfl); cred = rcu_access_pointer(mirror->ro_cred); put_cred(cred); cred = rcu_access_pointer(mirror->rw_cred); @@ -847,6 +848,9 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs4_pnfs_ds *ds; u32 ds_idx; + if (NFS_SERVER(pgio->pg_inode)->flags & + (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR)) + pgio->pg_maxretrans = io_maxretrans; retry: pnfs_generic_pg_check_layout(pgio, req); /* Use full layout for now */ @@ -860,6 +864,8 @@ retry: if (!pgio->pg_lseg) goto out_nolseg; } + /* Reset wb_nio, since getting layout segment was successful */ + req->wb_nio = 0; ds = ff_layout_get_ds_for_read(pgio, &ds_idx); if (!ds) { @@ -876,14 +882,24 @@ retry: pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; pgio->pg_mirror_idx = ds_idx; - - if (NFS_SERVER(pgio->pg_inode)->flags & - (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR)) - pgio->pg_maxretrans = io_maxretrans; return; out_nolseg: - if (pgio->pg_error < 0) - return; + if (pgio->pg_error < 0) { + if (pgio->pg_error != -EAGAIN) + return; + /* Retry getting layout segment if lower layer returned -EAGAIN */ + if (pgio->pg_maxretrans && req->wb_nio++ > pgio->pg_maxretrans) { + if (NFS_SERVER(pgio->pg_inode)->flags & NFS_MOUNT_SOFTERR) + pgio->pg_error = -ETIMEDOUT; + else + pgio->pg_error = -EIO; + return; + } + pgio->pg_error = 0; + /* Sleep for 1 second before retrying */ + ssleep(1); + goto retry; + } out_mds: trace_pnfs_mds_fallback_pg_init_read(pgio->pg_inode, 0, NFS4_MAX_UINT64, IOMODE_READ, @@ -1820,7 +1836,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) hdr->mds_offset = offset; /* Start IO accounting for local read */ - localio = ff_local_open_fh(ds->ds_clp, ds_cred, fh, FMODE_READ); + localio = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh, FMODE_READ); if (localio) { hdr->task.tk_start = ktime_get(); ff_layout_read_record_layoutstats_start(&hdr->task, hdr); @@ -1896,7 +1912,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) hdr->args.offset = offset; /* Start IO accounting for local write */ - localio = ff_local_open_fh(ds->ds_clp, ds_cred, fh, + localio = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh, FMODE_READ|FMODE_WRITE); if (localio) { hdr->task.tk_start = ktime_get(); @@ -1981,7 +1997,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) data->args.fh = fh; /* Start IO accounting for local commit */ - localio = ff_local_open_fh(ds->ds_clp, ds_cred, fh, + localio = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh, FMODE_READ|FMODE_WRITE); if (localio) { data->task.tk_start = ktime_get(); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index f84b3fb0dddd..095df09017a5 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -83,6 +83,7 @@ struct nfs4_ff_layout_mirror { nfs4_stateid stateid; const struct cred __rcu *ro_cred; const struct cred __rcu *rw_cred; + struct nfs_file_localio nfl; refcount_t ref; spinlock_t lock; unsigned long flags; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 596f35170137..1aa67fca69b2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1137,6 +1137,8 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, ctx->lock_context.open_context = ctx; INIT_LIST_HEAD(&ctx->list); ctx->mdsthreshold = NULL; + nfs_localio_file_init(&ctx->nfl); + return ctx; } EXPORT_SYMBOL_GPL(alloc_nfs_open_context); @@ -1168,6 +1170,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) nfs_sb_deactive(sb); put_rpccred(rcu_dereference_protected(ctx->ll_cred, 1)); kfree(ctx->mdsthreshold); + nfs_close_local_fh(&ctx->nfl); kfree_rcu(ctx, rcu_head); } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index e564bd11ba60..fae2c7ae4acc 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -455,11 +455,13 @@ extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode); #if IS_ENABLED(CONFIG_NFS_LOCALIO) /* localio.c */ -extern void nfs_local_disable(struct nfs_client *); extern void nfs_local_probe(struct nfs_client *); +extern void nfs_local_probe_async(struct nfs_client *); +extern void nfs_local_probe_async_work(struct work_struct *); extern struct nfsd_file *nfs_local_open_fh(struct nfs_client *, const struct cred *, struct nfs_fh *, + struct nfs_file_localio *, const fmode_t); extern int nfs_local_doio(struct nfs_client *, struct nfsd_file *, @@ -471,11 +473,12 @@ extern int nfs_local_commit(struct nfsd_file *, extern bool nfs_server_is_local(const struct nfs_client *clp); #else /* CONFIG_NFS_LOCALIO */ -static inline void nfs_local_disable(struct nfs_client *clp) {} static inline void nfs_local_probe(struct nfs_client *clp) {} +static inline void nfs_local_probe_async(struct nfs_client *clp) {} static inline struct nfsd_file * nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, - struct nfs_fh *fh, const fmode_t mode) + struct nfs_fh *fh, struct nfs_file_localio *nfl, + const fmode_t mode) { return NULL; } diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index 4b8618cf114c..5c21caeae075 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -35,6 +35,7 @@ struct nfs_local_kiocb { struct bio_vec *bvec; struct nfs_pgio_header *hdr; struct work_struct work; + void (*aio_complete_work)(struct work_struct *); struct nfsd_file *localio; }; @@ -48,9 +49,14 @@ struct nfs_local_fsync_ctx { static bool localio_enabled __read_mostly = true; module_param(localio_enabled, bool, 0644); +static bool localio_O_DIRECT_semantics __read_mostly = false; +module_param(localio_O_DIRECT_semantics, bool, 0644); +MODULE_PARM_DESC(localio_O_DIRECT_semantics, + "LOCALIO will use O_DIRECT semantics to filesystem."); + static inline bool nfs_client_is_local(const struct nfs_client *clp) { - return !!test_bit(NFS_CS_LOCAL_IO, &clp->cl_flags); + return !!rcu_access_pointer(clp->cl_uuid.net); } bool nfs_server_is_local(const struct nfs_client *clp) @@ -116,30 +122,6 @@ const struct rpc_program nfslocalio_program = { }; /* - * nfs_local_enable - enable local i/o for an nfs_client - */ -static void nfs_local_enable(struct nfs_client *clp) -{ - spin_lock(&clp->cl_localio_lock); - set_bit(NFS_CS_LOCAL_IO, &clp->cl_flags); - trace_nfs_local_enable(clp); - spin_unlock(&clp->cl_localio_lock); -} - -/* - * nfs_local_disable - disable local i/o for an nfs_client - */ -void nfs_local_disable(struct nfs_client *clp) -{ - spin_lock(&clp->cl_localio_lock); - if (test_and_clear_bit(NFS_CS_LOCAL_IO, &clp->cl_flags)) { - trace_nfs_local_disable(clp); - nfs_uuid_invalidate_one_client(&clp->cl_uuid); - } - spin_unlock(&clp->cl_localio_lock); -} - -/* * nfs_init_localioclient - Initialise an NFS localio client connection */ static struct rpc_clnt *nfs_init_localioclient(struct nfs_client *clp) @@ -178,7 +160,7 @@ static bool nfs_server_uuid_is_local(struct nfs_client *clp) rpc_shutdown_client(rpcclient_localio); /* Server is only local if it initialized required struct members */ - if (status || !clp->cl_uuid.net || !clp->cl_uuid.dom) + if (status || !rcu_access_pointer(clp->cl_uuid.net) || !clp->cl_uuid.dom) return false; return true; @@ -194,44 +176,64 @@ void nfs_local_probe(struct nfs_client *clp) /* Disallow localio if disabled via sysfs or AUTH_SYS isn't used */ if (!localio_enabled || clp->cl_rpcclient->cl_auth->au_flavor != RPC_AUTH_UNIX) { - nfs_local_disable(clp); + nfs_localio_disable_client(clp); return; } if (nfs_client_is_local(clp)) { /* If already enabled, disable and re-enable */ - nfs_local_disable(clp); + nfs_localio_disable_client(clp); } if (!nfs_uuid_begin(&clp->cl_uuid)) return; if (nfs_server_uuid_is_local(clp)) - nfs_local_enable(clp); + nfs_localio_enable_client(clp); nfs_uuid_end(&clp->cl_uuid); } EXPORT_SYMBOL_GPL(nfs_local_probe); +void nfs_local_probe_async_work(struct work_struct *work) +{ + struct nfs_client *clp = + container_of(work, struct nfs_client, cl_local_probe_work); + + nfs_local_probe(clp); +} + +void nfs_local_probe_async(struct nfs_client *clp) +{ + queue_work(nfsiod_workqueue, &clp->cl_local_probe_work); +} +EXPORT_SYMBOL_GPL(nfs_local_probe_async); + +static inline struct nfsd_file *nfs_local_file_get(struct nfsd_file *nf) +{ + return nfs_to->nfsd_file_get(nf); +} + +static inline void nfs_local_file_put(struct nfsd_file *nf) +{ + nfs_to->nfsd_file_put(nf); +} + /* - * nfs_local_open_fh - open a local filehandle in terms of nfsd_file + * __nfs_local_open_fh - open a local filehandle in terms of nfsd_file. * - * Returns a pointer to a struct nfsd_file or NULL + * Returns a pointer to a struct nfsd_file or ERR_PTR. + * Caller must release returned nfsd_file with nfs_to_nfsd_file_put_local(). */ -struct nfsd_file * -nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, - struct nfs_fh *fh, const fmode_t mode) +static struct nfsd_file * +__nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, + struct nfs_fh *fh, struct nfs_file_localio *nfl, + const fmode_t mode) { struct nfsd_file *localio; - int status; - - if (!nfs_server_is_local(clp)) - return NULL; - if (mode & ~(FMODE_READ | FMODE_WRITE)) - return NULL; localio = nfs_open_local_fh(&clp->cl_uuid, clp->cl_rpcclient, - cred, fh, mode); + cred, fh, nfl, mode); if (IS_ERR(localio)) { - status = PTR_ERR(localio); + int status = PTR_ERR(localio); trace_nfs_local_open_fh(fh, mode, status); switch (status) { case -ENOMEM: @@ -240,10 +242,59 @@ nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, /* Revalidate localio, will disable if unsupported */ nfs_local_probe(clp); } - return NULL; } return localio; } + +/* + * nfs_local_open_fh - open a local filehandle in terms of nfsd_file. + * First checking if the open nfsd_file is already cached, otherwise + * must __nfs_local_open_fh and insert the nfsd_file in nfs_file_localio. + * + * Returns a pointer to a struct nfsd_file or NULL. + */ +struct nfsd_file * +nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, + struct nfs_fh *fh, struct nfs_file_localio *nfl, + const fmode_t mode) +{ + struct nfsd_file *nf, *new, __rcu **pnf; + + if (!nfs_server_is_local(clp)) + return NULL; + if (mode & ~(FMODE_READ | FMODE_WRITE)) + return NULL; + + if (mode & FMODE_WRITE) + pnf = &nfl->rw_file; + else + pnf = &nfl->ro_file; + + new = NULL; + rcu_read_lock(); + nf = rcu_dereference(*pnf); + if (!nf) { + rcu_read_unlock(); + new = __nfs_local_open_fh(clp, cred, fh, nfl, mode); + if (IS_ERR(new)) + return NULL; + /* try to swap in the pointer */ + spin_lock(&clp->cl_uuid.lock); + nf = rcu_dereference_protected(*pnf, 1); + if (!nf) { + nf = new; + new = NULL; + rcu_assign_pointer(*pnf, nf); + } + spin_unlock(&clp->cl_uuid.lock); + rcu_read_lock(); + } + nf = nfs_local_file_get(nf); + rcu_read_unlock(); + if (new) + nfs_to_nfsd_file_put_local(new); + return nf; +} EXPORT_SYMBOL_GPL(nfs_local_open_fh); static struct bio_vec * @@ -285,10 +336,19 @@ nfs_local_iocb_alloc(struct nfs_pgio_header *hdr, kfree(iocb); return NULL; } - init_sync_kiocb(&iocb->kiocb, file); + + if (localio_O_DIRECT_semantics && + test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) { + iocb->kiocb.ki_filp = file; + iocb->kiocb.ki_flags = IOCB_DIRECT; + } else + init_sync_kiocb(&iocb->kiocb, file); + iocb->kiocb.ki_pos = hdr->args.offset; iocb->hdr = hdr; iocb->kiocb.ki_flags &= ~IOCB_APPEND; + iocb->aio_complete_work = NULL; + return iocb; } @@ -328,7 +388,7 @@ nfs_local_pgio_done(struct nfs_pgio_header *hdr, long status) hdr->res.op_status = NFS4_OK; hdr->task.tk_status = 0; } else { - hdr->res.op_status = nfs4_stat_to_errno(status); + hdr->res.op_status = nfs_localio_errno_to_nfs4_stat(status); hdr->task.tk_status = status; } } @@ -338,11 +398,23 @@ nfs_local_pgio_release(struct nfs_local_kiocb *iocb) { struct nfs_pgio_header *hdr = iocb->hdr; - nfs_to_nfsd_file_put_local(iocb->localio); + nfs_local_file_put(iocb->localio); nfs_local_iocb_free(iocb); nfs_local_hdr_release(hdr, hdr->task.tk_ops); } +/* + * Complete the I/O from iocb->kiocb.ki_complete() + * + * Note that this function can be called from a bottom half context, + * hence we need to queue the rpc_call_done() etc to a workqueue + */ +static inline void nfs_local_pgio_aio_complete(struct nfs_local_kiocb *iocb) +{ + INIT_WORK(&iocb->work, iocb->aio_complete_work); + queue_work(nfsiod_workqueue, &iocb->work); +} + static void nfs_local_read_done(struct nfs_local_kiocb *iocb, long status) { @@ -365,6 +437,23 @@ nfs_local_read_done(struct nfs_local_kiocb *iocb, long status) status > 0 ? status : 0, hdr->res.eof); } +static void nfs_local_read_aio_complete_work(struct work_struct *work) +{ + struct nfs_local_kiocb *iocb = + container_of(work, struct nfs_local_kiocb, work); + + nfs_local_pgio_release(iocb); +} + +static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret) +{ + struct nfs_local_kiocb *iocb = + container_of(kiocb, struct nfs_local_kiocb, kiocb); + + nfs_local_read_done(iocb, ret); + nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */ +} + static void nfs_local_call_read(struct work_struct *work) { struct nfs_local_kiocb *iocb = @@ -379,10 +468,10 @@ static void nfs_local_call_read(struct work_struct *work) nfs_local_iter_init(&iter, iocb, READ); status = filp->f_op->read_iter(&iocb->kiocb, &iter); - WARN_ON_ONCE(status == -EIOCBQUEUED); - - nfs_local_read_done(iocb, status); - nfs_local_pgio_release(iocb); + if (status != -EIOCBQUEUED) { + nfs_local_read_done(iocb, status); + nfs_local_pgio_release(iocb); + } revert_creds(save_cred); } @@ -410,6 +499,11 @@ nfs_do_local_read(struct nfs_pgio_header *hdr, nfs_local_pgio_init(hdr, call_ops); hdr->res.eof = false; + if (iocb->kiocb.ki_flags & IOCB_DIRECT) { + iocb->kiocb.ki_complete = nfs_local_read_aio_complete; + iocb->aio_complete_work = nfs_local_read_aio_complete_work; + } + INIT_WORK(&iocb->work, nfs_local_call_read); queue_work(nfslocaliod_workqueue, &iocb->work); @@ -534,6 +628,24 @@ nfs_local_write_done(struct nfs_local_kiocb *iocb, long status) nfs_local_pgio_done(hdr, status); } +static void nfs_local_write_aio_complete_work(struct work_struct *work) +{ + struct nfs_local_kiocb *iocb = + container_of(work, struct nfs_local_kiocb, work); + + nfs_local_vfs_getattr(iocb); + nfs_local_pgio_release(iocb); +} + +static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret) +{ + struct nfs_local_kiocb *iocb = + container_of(kiocb, struct nfs_local_kiocb, kiocb); + + nfs_local_write_done(iocb, ret); + nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */ +} + static void nfs_local_call_write(struct work_struct *work) { struct nfs_local_kiocb *iocb = @@ -552,11 +664,11 @@ static void nfs_local_call_write(struct work_struct *work) file_start_write(filp); status = filp->f_op->write_iter(&iocb->kiocb, &iter); file_end_write(filp); - WARN_ON_ONCE(status == -EIOCBQUEUED); - - nfs_local_write_done(iocb, status); - nfs_local_vfs_getattr(iocb); - nfs_local_pgio_release(iocb); + if (status != -EIOCBQUEUED) { + nfs_local_write_done(iocb, status); + nfs_local_vfs_getattr(iocb); + nfs_local_pgio_release(iocb); + } revert_creds(save_cred); current->flags = old_flags; @@ -592,10 +704,16 @@ nfs_do_local_write(struct nfs_pgio_header *hdr, case NFS_FILE_SYNC: iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC; } + nfs_local_pgio_init(hdr, call_ops); nfs_set_local_verifier(hdr->inode, hdr->res.verf, hdr->args.stable); + if (iocb->kiocb.ki_flags & IOCB_DIRECT) { + iocb->kiocb.ki_complete = nfs_local_write_aio_complete; + iocb->aio_complete_work = nfs_local_write_aio_complete_work; + } + INIT_WORK(&iocb->work, nfs_local_call_write); queue_work(nfslocaliod_workqueue, &iocb->work); @@ -626,8 +744,8 @@ int nfs_local_doio(struct nfs_client *clp, struct nfsd_file *localio, if (status != 0) { if (status == -EAGAIN) - nfs_local_disable(clp); - nfs_to_nfsd_file_put_local(localio); + nfs_localio_disable_client(clp); + nfs_local_file_put(localio); hdr->task.tk_status = status; nfs_local_hdr_release(hdr, call_ops); } @@ -668,7 +786,7 @@ nfs_local_commit_done(struct nfs_commit_data *data, int status) data->task.tk_status = 0; } else { nfs_reset_boot_verifier(data->inode); - data->res.op_status = nfs4_stat_to_errno(status); + data->res.op_status = nfs_localio_errno_to_nfs4_stat(status); data->task.tk_status = status; } } @@ -678,7 +796,7 @@ nfs_local_release_commit_data(struct nfsd_file *localio, struct nfs_commit_data *data, const struct rpc_call_ops *call_ops) { - nfs_to_nfsd_file_put_local(localio); + nfs_local_file_put(localio); call_ops->rpc_call_done(&data->task, data); call_ops->rpc_release(data); } diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 2d53574da605..973aed9cc5fe 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -308,7 +308,7 @@ int nfs_submount(struct fs_context *fc, struct nfs_server *server) int err; /* Look it up again to get its attributes */ - err = server->nfs_client->rpc_ops->lookup(d_inode(parent), dentry, + err = server->nfs_client->rpc_ops->lookup(d_inode(parent), dentry, &dentry->d_name, ctx->mntfh, ctx->clone_data.fattr); dput(parent); if (err != 0) diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 1566163c6d85..0c3bc98cd999 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -192,7 +192,7 @@ __nfs3_proc_lookup(struct inode *dir, const char *name, size_t len, } static int -nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, +nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { unsigned short task_flags = 0; @@ -202,8 +202,7 @@ nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, task_flags |= RPC_TASK_TIMEOUT; dprintk("NFS call lookup %pd2\n", dentry); - return __nfs3_proc_lookup(dir, dentry->d_name.name, - dentry->d_name.len, fhandle, fattr, + return __nfs3_proc_lookup(dir, name->name, name->len, fhandle, fattr, task_flags); } @@ -844,6 +843,41 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return status; } +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + +static unsigned nfs3_localio_probe_throttle __read_mostly = 0; +module_param(nfs3_localio_probe_throttle, uint, 0644); +MODULE_PARM_DESC(nfs3_localio_probe_throttle, + "Probe for NFSv3 LOCALIO every N IO requests. Must be power-of-2, defaults to 0 (probing disabled)."); + +static void nfs3_localio_probe(struct nfs_server *server) +{ + struct nfs_client *clp = server->nfs_client; + + /* Throttled to reduce nfs_local_probe_async() frequency */ + if (!nfs3_localio_probe_throttle || nfs_server_is_local(clp)) + return; + + /* + * Try (re)enabling LOCALIO if isn't enabled -- admin deems + * it worthwhile to periodically check if LOCALIO possible by + * setting the 'nfs3_localio_probe_throttle' module parameter. + * + * This is useful if LOCALIO was previously enabled, but was + * disabled due to server restart, and IO has successfully + * completed in terms of normal RPC. + */ + if ((clp->cl_uuid.nfs3_localio_probe_count++ & + (nfs3_localio_probe_throttle - 1)) == 0) { + if (!nfs_server_is_local(clp)) + nfs_local_probe_async(clp); + } +} + +#else +static void nfs3_localio_probe(struct nfs_server *server) {} +#endif + static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { struct inode *inode = hdr->inode; @@ -855,8 +889,11 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) if (nfs3_async_handle_jukebox(task, inode)) return -EAGAIN; - if (task->tk_status >= 0 && !server->read_hdrsize) - cmpxchg(&server->read_hdrsize, 0, hdr->res.replen); + if (task->tk_status >= 0) { + if (!server->read_hdrsize) + cmpxchg(&server->read_hdrsize, 0, hdr->res.replen); + nfs3_localio_probe(server); + } nfs_invalidate_atime(inode); nfs_refresh_inode(inode, &hdr->fattr); @@ -886,8 +923,10 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) if (nfs3_async_handle_jukebox(task, inode)) return -EAGAIN; - if (task->tk_status >= 0) + if (task->tk_status >= 0) { nfs_writeback_update_inode(hdr); + nfs3_localio_probe(NFS_SERVER(inode)); + } return 0; } diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 531c9c20ef1d..1924c4a2077b 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -498,15 +498,15 @@ out_put_src_lock: return err; } -struct nfs42_offloadcancel_data { +struct nfs42_offload_data { struct nfs_server *seq_server; struct nfs42_offload_status_args args; struct nfs42_offload_status_res res; }; -static void nfs42_offload_cancel_prepare(struct rpc_task *task, void *calldata) +static void nfs42_offload_prepare(struct rpc_task *task, void *calldata) { - struct nfs42_offloadcancel_data *data = calldata; + struct nfs42_offload_data *data = calldata; nfs4_setup_sequence(data->seq_server->nfs_client, &data->args.osa_seq_args, @@ -515,7 +515,7 @@ static void nfs42_offload_cancel_prepare(struct rpc_task *task, void *calldata) static void nfs42_offload_cancel_done(struct rpc_task *task, void *calldata) { - struct nfs42_offloadcancel_data *data = calldata; + struct nfs42_offload_data *data = calldata; trace_nfs4_offload_cancel(&data->args, task->tk_status); nfs41_sequence_done(task, &data->res.osr_seq_res); @@ -525,22 +525,22 @@ static void nfs42_offload_cancel_done(struct rpc_task *task, void *calldata) rpc_restart_call_prepare(task); } -static void nfs42_free_offloadcancel_data(void *data) +static void nfs42_offload_release(void *data) { kfree(data); } static const struct rpc_call_ops nfs42_offload_cancel_ops = { - .rpc_call_prepare = nfs42_offload_cancel_prepare, + .rpc_call_prepare = nfs42_offload_prepare, .rpc_call_done = nfs42_offload_cancel_done, - .rpc_release = nfs42_free_offloadcancel_data, + .rpc_release = nfs42_offload_release, }; static int nfs42_do_offload_cancel_async(struct file *dst, nfs4_stateid *stateid) { struct nfs_server *dst_server = NFS_SERVER(file_inode(dst)); - struct nfs42_offloadcancel_data *data = NULL; + struct nfs42_offload_data *data = NULL; struct nfs_open_context *ctx = nfs_file_open_context(dst); struct rpc_task *task; struct rpc_message msg = { @@ -552,14 +552,14 @@ static int nfs42_do_offload_cancel_async(struct file *dst, .rpc_message = &msg, .callback_ops = &nfs42_offload_cancel_ops, .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | RPC_TASK_MOVEABLE, }; int status; if (!(dst_server->caps & NFS_CAP_OFFLOAD_CANCEL)) return -EOPNOTSUPP; - data = kzalloc(sizeof(struct nfs42_offloadcancel_data), GFP_KERNEL); + data = kzalloc(sizeof(struct nfs42_offload_data), GFP_KERNEL); if (data == NULL) return -ENOMEM; @@ -861,7 +861,7 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server, .rpc_message = &msg, .callback_ops = &nfs42_layoutstat_ops, .callback_data = data, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | RPC_TASK_MOVEABLE, }; struct rpc_task *task; @@ -1016,7 +1016,7 @@ int nfs42_proc_layouterror(struct pnfs_layout_segment *lseg, struct rpc_task_setup task_setup = { .rpc_message = &msg, .callback_ops = &nfs42_layouterror_ops, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | RPC_TASK_MOVEABLE, }; unsigned int i; diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 9e3ae53e2205..5072d7ea72e9 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -144,9 +144,11 @@ decode_putfh_maxsz + \ decode_offload_cancel_maxsz) #define NFS4_enc_copy_notify_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_copy_notify_maxsz) #define NFS4_dec_copy_notify_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_copy_notify_maxsz) #define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \ @@ -549,7 +551,7 @@ static void nfs4_xdr_enc_copy(struct rpc_rqst *req, } /* - * Encode OFFLOAD_CANEL request + * Encode OFFLOAD_CANCEL request */ static void nfs4_xdr_enc_offload_cancel(struct rpc_rqst *req, struct xdr_stream *xdr, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d615d520f8cf..df9669d4ded7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4544,15 +4544,15 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, - struct dentry *dentry, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) + struct dentry *dentry, const struct qstr *name, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct nfs_server *server = NFS_SERVER(dir); int status; struct nfs4_lookup_arg args = { .bitmask = server->attr_bitmask, .dir_fh = NFS_FH(dir), - .name = &dentry->d_name, + .name = name, }; struct nfs4_lookup_res res = { .server = server, @@ -4594,17 +4594,16 @@ static void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr) } static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, - struct dentry *dentry, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) + struct dentry *dentry, const struct qstr *name, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct nfs4_exception exception = { .interruptible = true, }; struct rpc_clnt *client = *clnt; - const struct qstr *name = &dentry->d_name; int err; do { - err = _nfs4_proc_lookup(client, dir, dentry, fhandle, fattr); + err = _nfs4_proc_lookup(client, dir, dentry, name, fhandle, fattr); trace_nfs4_lookup(dir, name, err); switch (err) { case -NFS4ERR_BADNAME: @@ -4639,13 +4638,13 @@ out: return err; } -static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry, +static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { int status; struct rpc_clnt *client = NFS_CLIENT(dir); - status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr); + status = nfs4_proc_lookup_common(&client, dir, dentry, name, fhandle, fattr); if (client != NFS_CLIENT(dir)) { rpc_shutdown_client(client); nfs_fixup_secinfo_attributes(fattr); @@ -4660,7 +4659,8 @@ nfs4_proc_lookup_mountpoint(struct inode *dir, struct dentry *dentry, struct rpc_clnt *client = NFS_CLIENT(dir); int status; - status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr); + status = nfs4_proc_lookup_common(&client, dir, dentry, &dentry->d_name, + fhandle, fattr); if (status < 0) return ERR_PTR(status); return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 9a9f60a2291b..542cdf71229f 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1955,6 +1955,7 @@ restart: } rcu_read_unlock(); nfs4_free_state_owners(&freeme); + nfs_local_probe_async(clp); if (lost_locks) pr_warn("NFS: %s: lost %d locks\n", clp->cl_hostname, lost_locks); diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c index 886a7c4c60b3..d1a92d8f8ba4 100644 --- a/fs/nfs/nfs4sysctl.c +++ b/fs/nfs/nfs4sysctl.c @@ -17,7 +17,7 @@ static const int nfs_set_port_min; static const int nfs_set_port_max = 65535; static struct ctl_table_header *nfs4_callback_sysctl_table; -static struct ctl_table nfs4_cb_sysctls[] = { +static const struct ctl_table nfs4_cb_sysctls[] = { { .procname = "nfs_callback_tcpport", .data = &nfs_callback_set_tcpport, diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 1eab98c277fa..7a058bd8c566 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -1714,38 +1714,6 @@ TRACE_EVENT(nfs_local_open_fh, ) ); -DECLARE_EVENT_CLASS(nfs_local_client_event, - TP_PROTO( - const struct nfs_client *clp - ), - - TP_ARGS(clp), - - TP_STRUCT__entry( - __field(unsigned int, protocol) - __string(server, clp->cl_hostname) - ), - - TP_fast_assign( - __entry->protocol = clp->rpc_ops->version; - __assign_str(server); - ), - - TP_printk( - "server=%s NFSv%u", __get_str(server), __entry->protocol - ) -); - -#define DEFINE_NFS_LOCAL_CLIENT_EVENT(name) \ - DEFINE_EVENT(nfs_local_client_event, name, \ - TP_PROTO( \ - const struct nfs_client *clp \ - ), \ - TP_ARGS(clp)) - -DEFINE_NFS_LOCAL_CLIENT_EVENT(nfs_local_enable); -DEFINE_NFS_LOCAL_CLIENT_EVENT(nfs_local_disable); - DECLARE_EVENT_CLASS(nfs_xdr_event, TP_PROTO( const struct xdr_stream *xdr, diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index e27c07bd8929..11968dcb7243 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -961,8 +961,9 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) struct nfs_client *clp = NFS_SERVER(hdr->inode)->nfs_client; struct nfsd_file *localio = - nfs_local_open_fh(clp, hdr->cred, - hdr->args.fh, hdr->args.context->mode); + nfs_local_open_fh(clp, hdr->cred, hdr->args.fh, + &hdr->args.context->nfl, + hdr->args.context->mode); if (NFS_SERVER(hdr->inode)->nfs_client->cl_minorversion) task_flags = RPC_TASK_MOVEABLE; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 6c09cd090c34..77920a2e3cef 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -153,13 +153,13 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } static int -nfs_proc_lookup(struct inode *dir, struct dentry *dentry, +nfs_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct nfs_diropargs arg = { .fh = NFS_FH(dir), - .name = dentry->d_name.name, - .len = dentry->d_name.len + .name = name->name, + .len = name->len }; struct nfs_diropok res = { .fh = fhandle, diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index e645be1a3381..f579df0e8d67 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -14,7 +14,7 @@ static struct ctl_table_header *nfs_callback_sysctl_table; -static struct ctl_table nfs_cb_sysctls[] = { +static const struct ctl_table nfs_cb_sysctls[] = { { .procname = "nfs_mountpoint_timeout", .data = &nfs_mountpoint_expiry_timeout, diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index bf378ecd5d9f..7b59a40d40c0 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -280,9 +280,9 @@ void nfs_sysfs_link_rpc_client(struct nfs_server *server, char name[RPC_CLIENT_NAME_SIZE]; int ret; - strcpy(name, clnt->cl_program->name); - strcat(name, uniq ? uniq : ""); - strcat(name, "_client"); + strscpy(name, clnt->cl_program->name, sizeof(name)); + strncat(name, uniq ? uniq : "", sizeof(name) - strlen(name) - 1); + strncat(name, "_client", sizeof(name) - strlen(name) - 1); ret = sysfs_create_link_nowarn(&server->kobj, &clnt->cl_sysfs->kobject, name); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 50fa539611f5..aa3d8bea3ec0 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1826,7 +1826,8 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, task_flags = RPC_TASK_MOVEABLE; localio = nfs_local_open_fh(NFS_SERVER(inode)->nfs_client, data->cred, - data->args.fh, data->context->mode); + data->args.fh, &data->context->nfl, + data->context->mode); return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode), data->mds_ops, how, RPC_TASK_CRED_NOREF | task_flags, localio); diff --git a/fs/nfs_common/Makefile b/fs/nfs_common/Makefile index a5e54809701e..c10ead273ff2 100644 --- a/fs/nfs_common/Makefile +++ b/fs/nfs_common/Makefile @@ -6,8 +6,9 @@ obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o nfs_acl-objs := nfsacl.o +CFLAGS_localio_trace.o += -I$(src) obj-$(CONFIG_NFS_COMMON_LOCALIO_SUPPORT) += nfs_localio.o -nfs_localio-objs := nfslocalio.o +nfs_localio-objs := nfslocalio.o localio_trace.o obj-$(CONFIG_GRACE_PERIOD) += grace.o obj-$(CONFIG_NFS_V4_2_SSC_HELPER) += nfs_ssc.o diff --git a/fs/nfs_common/common.c b/fs/nfs_common/common.c index 34a115176f97..af09aed09fd2 100644 --- a/fs/nfs_common/common.c +++ b/fs/nfs_common/common.c @@ -15,7 +15,7 @@ static const struct { { NFS_OK, 0 }, { NFSERR_PERM, -EPERM }, { NFSERR_NOENT, -ENOENT }, - { NFSERR_IO, -errno_NFSERR_IO}, + { NFSERR_IO, -EIO }, { NFSERR_NXIO, -ENXIO }, /* { NFSERR_EAGAIN, -EAGAIN }, */ { NFSERR_ACCES, -EACCES }, @@ -45,7 +45,6 @@ static const struct { { NFSERR_SERVERFAULT, -EREMOTEIO }, { NFSERR_BADTYPE, -EBADTYPE }, { NFSERR_JUKEBOX, -EJUKEBOX }, - { -1, -EIO } }; /** @@ -59,26 +58,29 @@ int nfs_stat_to_errno(enum nfs_stat status) { int i; - for (i = 0; nfs_errtbl[i].stat != -1; i++) { + for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) { if (nfs_errtbl[i].stat == (int)status) return nfs_errtbl[i].errno; } - return nfs_errtbl[i].errno; + return -EIO; } EXPORT_SYMBOL_GPL(nfs_stat_to_errno); /* * We need to translate between nfs v4 status return values and * the local errno values which may not be the same. + * + * nfs4_errtbl_common[] is used before more specialized mappings + * available in nfs4_errtbl[] or nfs4_errtbl_localio[]. */ static const struct { int stat; int errno; -} nfs4_errtbl[] = { +} nfs4_errtbl_common[] = { { NFS4_OK, 0 }, { NFS4ERR_PERM, -EPERM }, { NFS4ERR_NOENT, -ENOENT }, - { NFS4ERR_IO, -errno_NFSERR_IO}, + { NFS4ERR_IO, -EIO }, { NFS4ERR_NXIO, -ENXIO }, { NFS4ERR_ACCESS, -EACCES }, { NFS4ERR_EXIST, -EEXIST }, @@ -98,15 +100,20 @@ static const struct { { NFS4ERR_BAD_COOKIE, -EBADCOOKIE }, { NFS4ERR_NOTSUPP, -ENOTSUPP }, { NFS4ERR_TOOSMALL, -ETOOSMALL }, - { NFS4ERR_SERVERFAULT, -EREMOTEIO }, { NFS4ERR_BADTYPE, -EBADTYPE }, - { NFS4ERR_LOCKED, -EAGAIN }, { NFS4ERR_SYMLINK, -ELOOP }, - { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP }, { NFS4ERR_DEADLOCK, -EDEADLK }, +}; + +static const struct { + int stat; + int errno; +} nfs4_errtbl[] = { + { NFS4ERR_SERVERFAULT, -EREMOTEIO }, + { NFS4ERR_LOCKED, -EAGAIN }, + { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP }, { NFS4ERR_NOXATTR, -ENODATA }, { NFS4ERR_XATTR2BIG, -E2BIG }, - { -1, -EIO } }; /* @@ -116,7 +123,14 @@ static const struct { int nfs4_stat_to_errno(int stat) { int i; - for (i = 0; nfs4_errtbl[i].stat != -1; i++) { + + /* First check nfs4_errtbl_common */ + for (i = 0; i < ARRAY_SIZE(nfs4_errtbl_common); i++) { + if (nfs4_errtbl_common[i].stat == stat) + return nfs4_errtbl_common[i].errno; + } + /* Then check nfs4_errtbl */ + for (i = 0; i < ARRAY_SIZE(nfs4_errtbl); i++) { if (nfs4_errtbl[i].stat == stat) return nfs4_errtbl[i].errno; } @@ -132,3 +146,56 @@ int nfs4_stat_to_errno(int stat) return -stat; } EXPORT_SYMBOL_GPL(nfs4_stat_to_errno); + +/* + * This table is useful for conversion from local errno to NFS error. + * It provides more logically correct mappings for use with LOCALIO + * (which is focused on converting from errno to NFS status). + */ +static const struct { + int stat; + int errno; +} nfs4_errtbl_localio[] = { + /* Map errors differently than nfs4_errtbl */ + { NFS4ERR_IO, -EREMOTEIO }, + { NFS4ERR_DELAY, -EAGAIN }, + { NFS4ERR_FBIG, -E2BIG }, + /* Map errors not handled by nfs4_errtbl */ + { NFS4ERR_STALE, -EBADF }, + { NFS4ERR_STALE, -EOPENSTALE }, + { NFS4ERR_DELAY, -ETIMEDOUT }, + { NFS4ERR_DELAY, -ERESTARTSYS }, + { NFS4ERR_DELAY, -ENOMEM }, + { NFS4ERR_IO, -ETXTBSY }, + { NFS4ERR_IO, -EBUSY }, + { NFS4ERR_SERVERFAULT, -ESERVERFAULT }, + { NFS4ERR_SERVERFAULT, -ENFILE }, + { NFS4ERR_IO, -EUCLEAN }, + { NFS4ERR_PERM, -ENOKEY }, +}; + +/* + * Convert an errno to an NFS error code for LOCALIO. + */ +__u32 nfs_localio_errno_to_nfs4_stat(int errno) +{ + int i; + + /* First check nfs4_errtbl_common */ + for (i = 0; i < ARRAY_SIZE(nfs4_errtbl_common); i++) { + if (nfs4_errtbl_common[i].errno == errno) + return nfs4_errtbl_common[i].stat; + } + /* Then check nfs4_errtbl_localio */ + for (i = 0; i < ARRAY_SIZE(nfs4_errtbl_localio); i++) { + if (nfs4_errtbl_localio[i].errno == errno) + return nfs4_errtbl_localio[i].stat; + } + /* If we cannot translate the error, the recovery routines should + * handle it. + * Note: remaining NFSv4 error codes have values > 10000, so should + * not conflict with native Linux error codes. + */ + return NFS4ERR_SERVERFAULT; +} +EXPORT_SYMBOL_GPL(nfs_localio_errno_to_nfs4_stat); diff --git a/fs/nfs_common/localio_trace.c b/fs/nfs_common/localio_trace.c new file mode 100644 index 000000000000..7decfe57abeb --- /dev/null +++ b/fs/nfs_common/localio_trace.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2024 Trond Myklebust <trond.myklebust@hammerspace.com> + * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com> + */ +#include <linux/nfs_fs.h> +#include <linux/namei.h> + +#define CREATE_TRACE_POINTS +#include "localio_trace.h" diff --git a/fs/nfs_common/localio_trace.h b/fs/nfs_common/localio_trace.h new file mode 100644 index 000000000000..4055aec9ff8d --- /dev/null +++ b/fs/nfs_common/localio_trace.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2024 Trond Myklebust <trond.myklebust@hammerspace.com> + * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com> + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM nfs_localio + +#if !defined(_TRACE_NFS_COMMON_LOCALIO_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NFS_COMMON_LOCALIO_H + +#include <linux/tracepoint.h> + +#include <trace/misc/fs.h> +#include <trace/misc/nfs.h> +#include <trace/misc/sunrpc.h> + +DECLARE_EVENT_CLASS(nfs_local_client_event, + TP_PROTO( + const struct nfs_client *clp + ), + + TP_ARGS(clp), + + TP_STRUCT__entry( + __field(unsigned int, protocol) + __string(server, clp->cl_hostname) + ), + + TP_fast_assign( + __entry->protocol = clp->rpc_ops->version; + __assign_str(server); + ), + + TP_printk( + "server=%s NFSv%u", __get_str(server), __entry->protocol + ) +); + +#define DEFINE_NFS_LOCAL_CLIENT_EVENT(name) \ + DEFINE_EVENT(nfs_local_client_event, name, \ + TP_PROTO( \ + const struct nfs_client *clp \ + ), \ + TP_ARGS(clp)) + +DEFINE_NFS_LOCAL_CLIENT_EVENT(nfs_localio_enable_client); +DEFINE_NFS_LOCAL_CLIENT_EVENT(nfs_localio_disable_client); + +#endif /* _TRACE_NFS_COMMON_LOCALIO_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE localio_trace +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c index a74ec08f6c96..6a0bdea6d644 100644 --- a/fs/nfs_common/nfslocalio.c +++ b/fs/nfs_common/nfslocalio.c @@ -7,38 +7,67 @@ #include <linux/module.h> #include <linux/list.h> #include <linux/nfslocalio.h> +#include <linux/nfs3.h> +#include <linux/nfs4.h> +#include <linux/nfs_fs.h> #include <net/netns/generic.h> +#include "localio_trace.h" + MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("NFS localio protocol bypass support"); -static DEFINE_SPINLOCK(nfs_uuid_lock); +static DEFINE_SPINLOCK(nfs_uuids_lock); /* * Global list of nfs_uuid_t instances - * that is protected by nfs_uuid_lock. + * that is protected by nfs_uuids_lock. */ static LIST_HEAD(nfs_uuids); +/* + * Lock ordering: + * 1: nfs_uuid->lock + * 2: nfs_uuids_lock + * 3: nfs_uuid->list_lock (aka nn->local_clients_lock) + * + * May skip locks in select cases, but never hold multiple + * locks out of order. + */ + void nfs_uuid_init(nfs_uuid_t *nfs_uuid) { - nfs_uuid->net = NULL; + RCU_INIT_POINTER(nfs_uuid->net, NULL); nfs_uuid->dom = NULL; + nfs_uuid->list_lock = NULL; INIT_LIST_HEAD(&nfs_uuid->list); + INIT_LIST_HEAD(&nfs_uuid->files); + spin_lock_init(&nfs_uuid->lock); + nfs_uuid->nfs3_localio_probe_count = 0; } EXPORT_SYMBOL_GPL(nfs_uuid_init); bool nfs_uuid_begin(nfs_uuid_t *nfs_uuid) { - spin_lock(&nfs_uuid_lock); - /* Is this nfs_uuid already in use? */ + spin_lock(&nfs_uuid->lock); + if (rcu_access_pointer(nfs_uuid->net)) { + /* This nfs_uuid is already in use */ + spin_unlock(&nfs_uuid->lock); + return false; + } + + spin_lock(&nfs_uuids_lock); if (!list_empty(&nfs_uuid->list)) { - spin_unlock(&nfs_uuid_lock); + /* This nfs_uuid is already in use */ + spin_unlock(&nfs_uuids_lock); + spin_unlock(&nfs_uuid->lock); return false; } - uuid_gen(&nfs_uuid->uuid); list_add_tail(&nfs_uuid->list, &nfs_uuids); - spin_unlock(&nfs_uuid_lock); + spin_unlock(&nfs_uuids_lock); + + uuid_gen(&nfs_uuid->uuid); + spin_unlock(&nfs_uuid->lock); return true; } @@ -46,12 +75,16 @@ EXPORT_SYMBOL_GPL(nfs_uuid_begin); void nfs_uuid_end(nfs_uuid_t *nfs_uuid) { - if (nfs_uuid->net == NULL) { - spin_lock(&nfs_uuid_lock); - if (nfs_uuid->net == NULL) + if (!rcu_access_pointer(nfs_uuid->net)) { + spin_lock(&nfs_uuid->lock); + if (!rcu_access_pointer(nfs_uuid->net)) { + /* Not local, remove from nfs_uuids */ + spin_lock(&nfs_uuids_lock); list_del_init(&nfs_uuid->list); - spin_unlock(&nfs_uuid_lock); - } + spin_unlock(&nfs_uuids_lock); + } + spin_unlock(&nfs_uuid->lock); + } } EXPORT_SYMBOL_GPL(nfs_uuid_end); @@ -69,68 +102,142 @@ static nfs_uuid_t * nfs_uuid_lookup_locked(const uuid_t *uuid) static struct module *nfsd_mod; void nfs_uuid_is_local(const uuid_t *uuid, struct list_head *list, - struct net *net, struct auth_domain *dom, - struct module *mod) + spinlock_t *list_lock, struct net *net, + struct auth_domain *dom, struct module *mod) { nfs_uuid_t *nfs_uuid; - spin_lock(&nfs_uuid_lock); + spin_lock(&nfs_uuids_lock); nfs_uuid = nfs_uuid_lookup_locked(uuid); - if (nfs_uuid) { - kref_get(&dom->ref); - nfs_uuid->dom = dom; - /* - * We don't hold a ref on the net, but instead put - * ourselves on a list so the net pointer can be - * invalidated. - */ - list_move(&nfs_uuid->list, list); - rcu_assign_pointer(nfs_uuid->net, net); - - __module_get(mod); - nfsd_mod = mod; + if (!nfs_uuid) { + spin_unlock(&nfs_uuids_lock); + return; } - spin_unlock(&nfs_uuid_lock); + + /* + * We don't hold a ref on the net, but instead put + * ourselves on @list (nn->local_clients) so the net + * pointer can be invalidated. + */ + spin_lock(list_lock); /* list_lock is nn->local_clients_lock */ + list_move(&nfs_uuid->list, list); + spin_unlock(list_lock); + + spin_unlock(&nfs_uuids_lock); + /* Once nfs_uuid is parented to @list, avoid global nfs_uuids_lock */ + spin_lock(&nfs_uuid->lock); + + __module_get(mod); + nfsd_mod = mod; + + nfs_uuid->list_lock = list_lock; + kref_get(&dom->ref); + nfs_uuid->dom = dom; + rcu_assign_pointer(nfs_uuid->net, net); + spin_unlock(&nfs_uuid->lock); } EXPORT_SYMBOL_GPL(nfs_uuid_is_local); -static void nfs_uuid_put_locked(nfs_uuid_t *nfs_uuid) +void nfs_localio_enable_client(struct nfs_client *clp) +{ + /* nfs_uuid_is_local() does the actual enablement */ + trace_nfs_localio_enable_client(clp); +} +EXPORT_SYMBOL_GPL(nfs_localio_enable_client); + +/* + * Cleanup the nfs_uuid_t embedded in an nfs_client. + * This is the long-form of nfs_uuid_init(). + */ +static bool nfs_uuid_put(nfs_uuid_t *nfs_uuid) { - if (nfs_uuid->net) { - module_put(nfsd_mod); - nfs_uuid->net = NULL; + LIST_HEAD(local_files); + struct nfs_file_localio *nfl, *tmp; + + spin_lock(&nfs_uuid->lock); + if (unlikely(!rcu_access_pointer(nfs_uuid->net))) { + spin_unlock(&nfs_uuid->lock); + return false; } + RCU_INIT_POINTER(nfs_uuid->net, NULL); + if (nfs_uuid->dom) { auth_domain_put(nfs_uuid->dom); nfs_uuid->dom = NULL; } - list_del_init(&nfs_uuid->list); + + list_splice_init(&nfs_uuid->files, &local_files); + spin_unlock(&nfs_uuid->lock); + + /* Walk list of files and ensure their last references dropped */ + list_for_each_entry_safe(nfl, tmp, &local_files, list) { + nfs_close_local_fh(nfl); + cond_resched(); + } + + spin_lock(&nfs_uuid->lock); + BUG_ON(!list_empty(&nfs_uuid->files)); + + /* Remove client from nn->local_clients */ + if (nfs_uuid->list_lock) { + spin_lock(nfs_uuid->list_lock); + BUG_ON(list_empty(&nfs_uuid->list)); + list_del_init(&nfs_uuid->list); + spin_unlock(nfs_uuid->list_lock); + nfs_uuid->list_lock = NULL; + } + + module_put(nfsd_mod); + spin_unlock(&nfs_uuid->lock); + + return true; } -void nfs_uuid_invalidate_clients(struct list_head *list) +void nfs_localio_disable_client(struct nfs_client *clp) { + if (nfs_uuid_put(&clp->cl_uuid)) + trace_nfs_localio_disable_client(clp); +} +EXPORT_SYMBOL_GPL(nfs_localio_disable_client); + +void nfs_localio_invalidate_clients(struct list_head *nn_local_clients, + spinlock_t *nn_local_clients_lock) +{ + LIST_HEAD(local_clients); nfs_uuid_t *nfs_uuid, *tmp; + struct nfs_client *clp; - spin_lock(&nfs_uuid_lock); - list_for_each_entry_safe(nfs_uuid, tmp, list, list) - nfs_uuid_put_locked(nfs_uuid); - spin_unlock(&nfs_uuid_lock); + spin_lock(nn_local_clients_lock); + list_splice_init(nn_local_clients, &local_clients); + spin_unlock(nn_local_clients_lock); + list_for_each_entry_safe(nfs_uuid, tmp, &local_clients, list) { + if (WARN_ON(nfs_uuid->list_lock != nn_local_clients_lock)) + break; + clp = container_of(nfs_uuid, struct nfs_client, cl_uuid); + nfs_localio_disable_client(clp); + } } -EXPORT_SYMBOL_GPL(nfs_uuid_invalidate_clients); +EXPORT_SYMBOL_GPL(nfs_localio_invalidate_clients); -void nfs_uuid_invalidate_one_client(nfs_uuid_t *nfs_uuid) +static void nfs_uuid_add_file(nfs_uuid_t *nfs_uuid, struct nfs_file_localio *nfl) { - if (nfs_uuid->net) { - spin_lock(&nfs_uuid_lock); - nfs_uuid_put_locked(nfs_uuid); - spin_unlock(&nfs_uuid_lock); + /* Add nfl to nfs_uuid->files if it isn't already */ + spin_lock(&nfs_uuid->lock); + if (list_empty(&nfl->list)) { + rcu_assign_pointer(nfl->nfs_uuid, nfs_uuid); + list_add_tail(&nfl->list, &nfs_uuid->files); } + spin_unlock(&nfs_uuid->lock); } -EXPORT_SYMBOL_GPL(nfs_uuid_invalidate_one_client); +/* + * Caller is responsible for calling nfsd_net_put and + * nfsd_file_put (via nfs_to_nfsd_file_put_local). + */ struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid, struct rpc_clnt *rpc_clnt, const struct cred *cred, - const struct nfs_fh *nfs_fh, const fmode_t fmode) + const struct nfs_fh *nfs_fh, struct nfs_file_localio *nfl, + const fmode_t fmode) { struct net *net; struct nfsd_file *localio; @@ -139,7 +246,7 @@ struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid, * Not running in nfsd context, so must safely get reference on nfsd_serv. * But the server may already be shutting down, if so disallow new localio. * uuid->net is NOT a counted reference, but rcu_read_lock() ensures that - * if uuid->net is not NULL, then calling nfsd_serv_try_get() is safe + * if uuid->net is not NULL, then calling nfsd_net_try_get() is safe * and if it succeeds we will have an implied reference to the net. * * Otherwise NFS may not have ref on NFSD and therefore cannot safely @@ -147,21 +254,62 @@ struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid, */ rcu_read_lock(); net = rcu_dereference(uuid->net); - if (!net || !nfs_to->nfsd_serv_try_get(net)) { + if (!net || !nfs_to->nfsd_net_try_get(net)) { rcu_read_unlock(); return ERR_PTR(-ENXIO); } rcu_read_unlock(); - /* We have an implied reference to net thanks to nfsd_serv_try_get */ + /* We have an implied reference to net thanks to nfsd_net_try_get */ localio = nfs_to->nfsd_open_local_fh(net, uuid->dom, rpc_clnt, cred, nfs_fh, fmode); if (IS_ERR(localio)) nfs_to_nfsd_net_put(net); + else + nfs_uuid_add_file(uuid, nfl); return localio; } EXPORT_SYMBOL_GPL(nfs_open_local_fh); +void nfs_close_local_fh(struct nfs_file_localio *nfl) +{ + struct nfsd_file *ro_nf = NULL; + struct nfsd_file *rw_nf = NULL; + nfs_uuid_t *nfs_uuid; + + rcu_read_lock(); + nfs_uuid = rcu_dereference(nfl->nfs_uuid); + if (!nfs_uuid) { + /* regular (non-LOCALIO) NFS will hammer this */ + rcu_read_unlock(); + return; + } + + ro_nf = rcu_access_pointer(nfl->ro_file); + rw_nf = rcu_access_pointer(nfl->rw_file); + if (ro_nf || rw_nf) { + spin_lock(&nfs_uuid->lock); + if (ro_nf) + ro_nf = rcu_dereference_protected(xchg(&nfl->ro_file, NULL), 1); + if (rw_nf) + rw_nf = rcu_dereference_protected(xchg(&nfl->rw_file, NULL), 1); + + /* Remove nfl from nfs_uuid->files list */ + RCU_INIT_POINTER(nfl->nfs_uuid, NULL); + list_del_init(&nfl->list); + spin_unlock(&nfs_uuid->lock); + rcu_read_unlock(); + + if (ro_nf) + nfs_to_nfsd_file_put_local(ro_nf); + if (rw_nf) + nfs_to_nfsd_file_put_local(rw_nf); + return; + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(nfs_close_local_fh); + /* * The NFS LOCALIO code needs to call into NFSD using various symbols, * but cannot be statically linked, because that will make the NFS diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index dc5c9d8e8202..0e552d873eaa 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -39,6 +39,7 @@ #include <linux/fsnotify.h> #include <linux/seq_file.h> #include <linux/rhashtable.h> +#include <linux/nfslocalio.h> #include "vfs.h" #include "nfsd.h" @@ -391,7 +392,7 @@ nfsd_file_put(struct nfsd_file *nf) } /** - * nfsd_file_put_local - put nfsd_file reference and arm nfsd_serv_put in caller + * nfsd_file_put_local - put nfsd_file reference and arm nfsd_net_put in caller * @nf: nfsd_file of which to put the reference * * First save the associated net to return to caller, then put @@ -833,6 +834,14 @@ __nfsd_file_cache_purge(struct net *net) struct nfsd_file *nf; LIST_HEAD(dispose); +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + if (net) { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + nfs_localio_invalidate_clients(&nn->local_clients, + &nn->local_clients_lock); + } +#endif + rhltable_walk_enter(&nfsd_file_rhltable, &iter); do { rhashtable_walk_start(&iter); @@ -1222,10 +1231,9 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, * a file. The security implications of this should be carefully * considered before use. * - * The nfsd_file object returned by this API is reference-counted - * and garbage-collected. The object is retained for a few - * seconds after the final nfsd_file_put() in case the caller - * wants to re-use it. + * The nfsd_file_object returned by this API is reference-counted + * but not garbage-collected. The object is unhashed after the + * final nfsd_file_put(). * * Return values: * %nfs_ok - @pnf points to an nfsd_file with its reference @@ -1247,7 +1255,7 @@ nfsd_file_acquire_local(struct net *net, struct svc_cred *cred, __be32 beres; beres = nfsd_file_do_acquire(NULL, net, cred, client, - fhp, may_flags, NULL, pnf, true); + fhp, may_flags, NULL, pnf, false); put_cred(revert_creds(save_cred)); return beres; } diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c index f441cb9f74d5..238647fa379e 100644 --- a/fs/nfsd/localio.c +++ b/fs/nfsd/localio.c @@ -25,10 +25,12 @@ #include "cache.h" static const struct nfsd_localio_operations nfsd_localio_ops = { - .nfsd_serv_try_get = nfsd_serv_try_get, - .nfsd_serv_put = nfsd_serv_put, + .nfsd_net_try_get = nfsd_net_try_get, + .nfsd_net_put = nfsd_net_put, .nfsd_open_local_fh = nfsd_open_local_fh, .nfsd_file_put_local = nfsd_file_put_local, + .nfsd_file_get = nfsd_file_get, + .nfsd_file_put = nfsd_file_put, .nfsd_file_file = nfsd_file_file, }; @@ -52,7 +54,7 @@ void nfsd_localio_ops_init(void) * avoid all the NFS overhead with reads, writes and commits. * * On successful return, returned nfsd_file will have its nf_net member - * set. Caller (NFS client) is responsible for calling nfsd_serv_put and + * set. Caller (NFS client) is responsible for calling nfsd_net_put and * nfsd_file_put (via nfs_to_nfsd_file_put_local). */ struct nfsd_file * @@ -114,6 +116,7 @@ static __be32 localio_proc_uuid_is_local(struct svc_rqst *rqstp) struct nfsd_net *nn = net_generic(net, nfsd_net_id); nfs_uuid_is_local(&argp->uuid, &nn->local_clients, + &nn->local_clients_lock, net, rqstp->rq_client, THIS_MODULE); return rpc_success; diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 4a07b8d0837b..3e2d0fde80a7 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -134,9 +134,10 @@ struct nfsd_net { struct svc_info nfsd_info; #define nfsd_serv nfsd_info.serv - struct percpu_ref nfsd_serv_ref; - struct completion nfsd_serv_confirm_done; - struct completion nfsd_serv_free_done; + + struct percpu_ref nfsd_net_ref; + struct completion nfsd_net_confirm_done; + struct completion nfsd_net_free_done; /* * clientid and stateid data for construction of net unique COPY @@ -213,6 +214,7 @@ struct nfsd_net { #if IS_ENABLED(CONFIG_NFS_LOCALIO) /* Local clients to be invalidated when net is shut down */ + spinlock_t local_clients_lock; struct list_head local_clients; #endif }; @@ -223,8 +225,8 @@ struct nfsd_net { extern bool nfsd_support_version(int vers); extern unsigned int nfsd_net_id; -bool nfsd_serv_try_get(struct net *net); -void nfsd_serv_put(struct net *net); +bool nfsd_net_try_get(struct net *net); +void nfsd_net_put(struct net *net); void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn); void nfsd_reset_write_verifier(struct nfsd_net *nn); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 95ea4393305b..ce2a71e4904c 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -2217,6 +2217,7 @@ static __net_init int nfsd_net_init(struct net *net) seqlock_init(&nn->writeverf_lock); nfsd_proc_stat_init(net); #if IS_ENABLED(CONFIG_NFS_LOCALIO) + spin_lock_init(&nn->local_clients_lock); INIT_LIST_HEAD(&nn->local_clients); #endif return 0; @@ -2234,14 +2235,15 @@ out_export_error: * nfsd_net_pre_exit - Disconnect localio clients from net namespace * @net: a network namespace that is about to be destroyed * - * This invalidated ->net pointers held by localio clients + * This invalidates ->net pointers held by localio clients * while they can still safely access nn->counter. */ static __net_exit void nfsd_net_pre_exit(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - nfs_uuid_invalidate_clients(&nn->local_clients); + nfs_localio_invalidate_clients(&nn->local_clients, + &nn->local_clients_lock); } #endif diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 3f5104ed70bf..9b3d6cff0e1e 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -204,32 +204,32 @@ int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change return 0; } -bool nfsd_serv_try_get(struct net *net) __must_hold(rcu) +bool nfsd_net_try_get(struct net *net) __must_hold(rcu) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - return (nn && percpu_ref_tryget_live(&nn->nfsd_serv_ref)); + return (nn && percpu_ref_tryget_live(&nn->nfsd_net_ref)); } -void nfsd_serv_put(struct net *net) __must_hold(rcu) +void nfsd_net_put(struct net *net) __must_hold(rcu) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - percpu_ref_put(&nn->nfsd_serv_ref); + percpu_ref_put(&nn->nfsd_net_ref); } -static void nfsd_serv_done(struct percpu_ref *ref) +static void nfsd_net_done(struct percpu_ref *ref) { - struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_serv_ref); + struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_net_ref); - complete(&nn->nfsd_serv_confirm_done); + complete(&nn->nfsd_net_confirm_done); } -static void nfsd_serv_free(struct percpu_ref *ref) +static void nfsd_net_free(struct percpu_ref *ref) { - struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_serv_ref); + struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_net_ref); - complete(&nn->nfsd_serv_free_done); + complete(&nn->nfsd_net_free_done); } /* @@ -426,6 +426,10 @@ static void nfsd_shutdown_net(struct net *net) if (!nn->nfsd_net_up) return; + + percpu_ref_kill_and_confirm(&nn->nfsd_net_ref, nfsd_net_done); + wait_for_completion(&nn->nfsd_net_confirm_done); + nfsd_export_flush(net); nfs4_state_shutdown_net(net); nfsd_reply_cache_shutdown(nn); @@ -434,7 +438,10 @@ static void nfsd_shutdown_net(struct net *net) lockd_down(net); nn->lockd_up = false; } - percpu_ref_exit(&nn->nfsd_serv_ref); + + wait_for_completion(&nn->nfsd_net_free_done); + percpu_ref_exit(&nn->nfsd_net_ref); + nn->nfsd_net_up = false; nfsd_shutdown_generic(); } @@ -516,11 +523,6 @@ void nfsd_destroy_serv(struct net *net) lockdep_assert_held(&nfsd_mutex); - percpu_ref_kill_and_confirm(&nn->nfsd_serv_ref, nfsd_serv_done); - wait_for_completion(&nn->nfsd_serv_confirm_done); - wait_for_completion(&nn->nfsd_serv_free_done); - /* percpu_ref_exit is called in nfsd_shutdown_net */ - spin_lock(&nfsd_notifier_lock); nn->nfsd_serv = NULL; spin_unlock(&nfsd_notifier_lock); @@ -621,12 +623,12 @@ int nfsd_create_serv(struct net *net) if (nn->nfsd_serv) return 0; - error = percpu_ref_init(&nn->nfsd_serv_ref, nfsd_serv_free, + error = percpu_ref_init(&nn->nfsd_net_ref, nfsd_net_free, 0, GFP_KERNEL); if (error) return error; - init_completion(&nn->nfsd_serv_free_done); - init_completion(&nn->nfsd_serv_confirm_done); + init_completion(&nn->nfsd_net_free_done); + init_completion(&nn->nfsd_net_confirm_done); if (nfsd_max_blksize == 0) nfsd_max_blksize = nfsd_get_default_max_blksize(); diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 6004dfdfdf0f..c4cdaf5fa7ed 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -20,7 +20,7 @@ static int dir_notify_enable __read_mostly = 1; #ifdef CONFIG_SYSCTL -static struct ctl_table dnotify_sysctls[] = { +static const struct ctl_table dnotify_sysctls[] = { { .procname = "dir-notify-enable", .data = &dir_notify_enable, diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 6ff94e312232..ba3e2d09eb44 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -58,7 +58,7 @@ static int fanotify_max_queued_events __read_mostly; static long ft_zero = 0; static long ft_int_max = INT_MAX; -static struct ctl_table fanotify_table[] = { +static const struct ctl_table fanotify_table[] = { { .procname = "max_user_groups", .data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS], diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index e0c48956608a..b372fb2c56bd 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -58,7 +58,7 @@ struct kmem_cache *inotify_inode_mark_cachep __ro_after_init; static long it_zero = 0; static long it_int_max = INT_MAX; -static struct ctl_table inotify_table[] = { +static const struct ctl_table inotify_table[] = { { .procname = "max_user_instances", .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES], diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index 8d789b017fa9..af94e3737470 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -787,7 +787,8 @@ pack_runs: if (err) goto out; - attr = mi_find_attr(mi, NULL, type, name, name_len, &le->id); + attr = mi_find_attr(ni, mi, NULL, type, name, name_len, + &le->id); if (!attr) { err = -EINVAL; goto bad_inode; @@ -1181,7 +1182,7 @@ repack: goto out; } - attr = mi_find_attr(mi, NULL, ATTR_DATA, NULL, 0, &le->id); + attr = mi_find_attr(ni, mi, NULL, ATTR_DATA, NULL, 0, &le->id); if (!attr) { err = -EINVAL; goto out; @@ -1406,7 +1407,7 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr, */ if (!attr->non_res) { if (vbo[1] + bytes_per_off > le32_to_cpu(attr->res.data_size)) { - ntfs_inode_err(&ni->vfs_inode, "is corrupted"); + _ntfs_bad_inode(&ni->vfs_inode); return -EINVAL; } addr = resident_data(attr); @@ -1796,7 +1797,7 @@ repack: goto out; } - attr = mi_find_attr(mi, NULL, ATTR_DATA, NULL, 0, + attr = mi_find_attr(ni, mi, NULL, ATTR_DATA, NULL, 0, &le->id); if (!attr) { err = -EINVAL; @@ -2041,8 +2042,8 @@ int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) } /* Look for required attribute. */ - attr = mi_find_attr(mi, NULL, ATTR_DATA, NULL, - 0, &le->id); + attr = mi_find_attr(ni, mi, NULL, ATTR_DATA, + NULL, 0, &le->id); if (!attr) { err = -EINVAL; goto out; @@ -2587,7 +2588,7 @@ int attr_force_nonresident(struct ntfs_inode *ni) attr = ni_find_attr(ni, NULL, &le, ATTR_DATA, NULL, 0, NULL, &mi); if (!attr) { - ntfs_bad_inode(&ni->vfs_inode, "no data attribute"); + _ntfs_bad_inode(&ni->vfs_inode); return -ENOENT; } diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index fc6a8aa29e3a..b6da80c69ca6 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -512,7 +512,7 @@ out: ctx->pos = pos; } else if (err < 0) { if (err == -EINVAL) - ntfs_inode_err(dir, "directory corrupted"); + _ntfs_bad_inode(dir); ctx->pos = eod; } diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 8b39d0ce5f28..5df6a0b5add9 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -75,7 +75,7 @@ struct ATTR_STD_INFO *ni_std(struct ntfs_inode *ni) { const struct ATTRIB *attr; - attr = mi_find_attr(&ni->mi, NULL, ATTR_STD, NULL, 0, NULL); + attr = mi_find_attr(ni, &ni->mi, NULL, ATTR_STD, NULL, 0, NULL); return attr ? resident_data_ex(attr, sizeof(struct ATTR_STD_INFO)) : NULL; } @@ -89,7 +89,7 @@ struct ATTR_STD_INFO5 *ni_std5(struct ntfs_inode *ni) { const struct ATTRIB *attr; - attr = mi_find_attr(&ni->mi, NULL, ATTR_STD, NULL, 0, NULL); + attr = mi_find_attr(ni, &ni->mi, NULL, ATTR_STD, NULL, 0, NULL); return attr ? resident_data_ex(attr, sizeof(struct ATTR_STD_INFO5)) : NULL; @@ -148,8 +148,10 @@ int ni_load_mi_ex(struct ntfs_inode *ni, CLST rno, struct mft_inode **mi) goto out; err = mi_get(ni->mi.sbi, rno, &r); - if (err) + if (err) { + _ntfs_bad_inode(&ni->vfs_inode); return err; + } ni_add_mi(ni, r); @@ -201,7 +203,8 @@ struct ATTRIB *ni_find_attr(struct ntfs_inode *ni, struct ATTRIB *attr, *mi = &ni->mi; /* Look for required attribute in primary record. */ - return mi_find_attr(&ni->mi, attr, type, name, name_len, NULL); + return mi_find_attr(ni, &ni->mi, attr, type, name, name_len, + NULL); } /* First look for list entry of required type. */ @@ -217,7 +220,7 @@ struct ATTRIB *ni_find_attr(struct ntfs_inode *ni, struct ATTRIB *attr, return NULL; /* Look for required attribute. */ - attr = mi_find_attr(m, NULL, type, name, name_len, &le->id); + attr = mi_find_attr(ni, m, NULL, type, name, name_len, &le->id); if (!attr) goto out; @@ -238,8 +241,7 @@ struct ATTRIB *ni_find_attr(struct ntfs_inode *ni, struct ATTRIB *attr, return attr; out: - ntfs_inode_err(&ni->vfs_inode, "failed to parse mft record"); - ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_ERROR); + _ntfs_bad_inode(&ni->vfs_inode); return NULL; } @@ -259,7 +261,7 @@ struct ATTRIB *ni_enum_attr_ex(struct ntfs_inode *ni, struct ATTRIB *attr, if (mi) *mi = &ni->mi; /* Enum attributes in primary record. */ - return mi_enum_attr(&ni->mi, attr); + return mi_enum_attr(ni, &ni->mi, attr); } /* Get next list entry. */ @@ -275,7 +277,7 @@ struct ATTRIB *ni_enum_attr_ex(struct ntfs_inode *ni, struct ATTRIB *attr, *mi = mi2; /* Find attribute in loaded record. */ - return rec_find_attr_le(mi2, le2); + return rec_find_attr_le(ni, mi2, le2); } /* @@ -293,7 +295,8 @@ struct ATTRIB *ni_load_attr(struct ntfs_inode *ni, enum ATTR_TYPE type, if (!ni->attr_list.size) { if (pmi) *pmi = &ni->mi; - return mi_find_attr(&ni->mi, NULL, type, name, name_len, NULL); + return mi_find_attr(ni, &ni->mi, NULL, type, name, name_len, + NULL); } le = al_find_ex(ni, NULL, type, name, name_len, NULL); @@ -319,7 +322,7 @@ struct ATTRIB *ni_load_attr(struct ntfs_inode *ni, enum ATTR_TYPE type, if (pmi) *pmi = mi; - attr = mi_find_attr(mi, NULL, type, name, name_len, &le->id); + attr = mi_find_attr(ni, mi, NULL, type, name, name_len, &le->id); if (!attr) return NULL; @@ -330,6 +333,7 @@ struct ATTRIB *ni_load_attr(struct ntfs_inode *ni, enum ATTR_TYPE type, vcn <= le64_to_cpu(attr->nres.evcn)) return attr; + _ntfs_bad_inode(&ni->vfs_inode); return NULL; } @@ -398,7 +402,8 @@ int ni_remove_attr(struct ntfs_inode *ni, enum ATTR_TYPE type, int diff; if (base_only || type == ATTR_LIST || !ni->attr_list.size) { - attr = mi_find_attr(&ni->mi, NULL, type, name, name_len, id); + attr = mi_find_attr(ni, &ni->mi, NULL, type, name, name_len, + id); if (!attr) return -ENOENT; @@ -437,7 +442,7 @@ next_le2: al_remove_le(ni, le); - attr = mi_find_attr(mi, NULL, type, name, name_len, id); + attr = mi_find_attr(ni, mi, NULL, type, name, name_len, id); if (!attr) return -ENOENT; @@ -485,7 +490,7 @@ ni_ins_new_attr(struct ntfs_inode *ni, struct mft_inode *mi, name = le->name; } - attr = mi_insert_attr(mi, type, name, name_len, asize, name_off); + attr = mi_insert_attr(ni, mi, type, name, name_len, asize, name_off); if (!attr) { if (le_added) al_remove_le(ni, le); @@ -673,7 +678,7 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni) if (err) return err; - attr_list = mi_find_attr(&ni->mi, NULL, ATTR_LIST, NULL, 0, NULL); + attr_list = mi_find_attr(ni, &ni->mi, NULL, ATTR_LIST, NULL, 0, NULL); if (!attr_list) return 0; @@ -695,7 +700,7 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni) if (!mi) return 0; - attr = mi_find_attr(mi, NULL, le->type, le_name(le), + attr = mi_find_attr(ni, mi, NULL, le->type, le_name(le), le->name_len, &le->id); if (!attr) return 0; @@ -731,7 +736,7 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni) goto out; } - attr = mi_find_attr(mi, NULL, le->type, le_name(le), + attr = mi_find_attr(ni, mi, NULL, le->type, le_name(le), le->name_len, &le->id); if (!attr) { /* Should never happened, 'cause already checked. */ @@ -740,7 +745,7 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni) asize = le32_to_cpu(attr->size); /* Insert into primary record. */ - attr_ins = mi_insert_attr(&ni->mi, le->type, le_name(le), + attr_ins = mi_insert_attr(ni, &ni->mi, le->type, le_name(le), le->name_len, asize, le16_to_cpu(attr->name_off)); if (!attr_ins) { @@ -768,7 +773,7 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni) if (!mi) continue; - attr = mi_find_attr(mi, NULL, le->type, le_name(le), + attr = mi_find_attr(ni, mi, NULL, le->type, le_name(le), le->name_len, &le->id); if (!attr) continue; @@ -831,7 +836,7 @@ int ni_create_attr_list(struct ntfs_inode *ni) free_b = 0; attr = NULL; - for (; (attr = mi_enum_attr(&ni->mi, attr)); le = Add2Ptr(le, sz)) { + for (; (attr = mi_enum_attr(ni, &ni->mi, attr)); le = Add2Ptr(le, sz)) { sz = le_size(attr->name_len); le->type = attr->type; le->size = cpu_to_le16(sz); @@ -886,7 +891,7 @@ int ni_create_attr_list(struct ntfs_inode *ni) u32 asize = le32_to_cpu(b->size); u16 name_off = le16_to_cpu(b->name_off); - attr = mi_insert_attr(mi, b->type, Add2Ptr(b, name_off), + attr = mi_insert_attr(ni, mi, b->type, Add2Ptr(b, name_off), b->name_len, asize, name_off); if (!attr) goto out; @@ -909,7 +914,7 @@ int ni_create_attr_list(struct ntfs_inode *ni) goto out; } - attr = mi_insert_attr(&ni->mi, ATTR_LIST, NULL, 0, + attr = mi_insert_attr(ni, &ni->mi, ATTR_LIST, NULL, 0, lsize + SIZEOF_RESIDENT, SIZEOF_RESIDENT); if (!attr) goto out; @@ -993,13 +998,13 @@ static int ni_ins_attr_ext(struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le, mi = rb_entry(node, struct mft_inode, node); if (is_mft_data && - (mi_enum_attr(mi, NULL) || + (mi_enum_attr(ni, mi, NULL) || vbo <= ((u64)mi->rno << sbi->record_bits))) { /* We can't accept this record 'cause MFT's bootstrapping. */ continue; } if (is_mft && - mi_find_attr(mi, NULL, ATTR_DATA, NULL, 0, NULL)) { + mi_find_attr(ni, mi, NULL, ATTR_DATA, NULL, 0, NULL)) { /* * This child record already has a ATTR_DATA. * So it can't accept any other records. @@ -1008,7 +1013,7 @@ static int ni_ins_attr_ext(struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le, } if ((type != ATTR_NAME || name_len) && - mi_find_attr(mi, NULL, type, name, name_len, NULL)) { + mi_find_attr(ni, mi, NULL, type, name, name_len, NULL)) { /* Only indexed attributes can share same record. */ continue; } @@ -1157,7 +1162,7 @@ static int ni_insert_attr(struct ntfs_inode *ni, enum ATTR_TYPE type, /* Estimate the result of moving all possible attributes away. */ attr = NULL; - while ((attr = mi_enum_attr(&ni->mi, attr))) { + while ((attr = mi_enum_attr(ni, &ni->mi, attr))) { if (attr->type == ATTR_STD) continue; if (attr->type == ATTR_LIST) @@ -1175,7 +1180,7 @@ static int ni_insert_attr(struct ntfs_inode *ni, enum ATTR_TYPE type, attr = NULL; for (;;) { - attr = mi_enum_attr(&ni->mi, attr); + attr = mi_enum_attr(ni, &ni->mi, attr); if (!attr) { /* We should never be here 'cause we have already check this case. */ err = -EINVAL; @@ -1259,7 +1264,7 @@ static int ni_expand_mft_list(struct ntfs_inode *ni) for (node = rb_first(&ni->mi_tree); node; node = rb_next(node)) { mi = rb_entry(node, struct mft_inode, node); - attr = mi_enum_attr(mi, NULL); + attr = mi_enum_attr(ni, mi, NULL); if (!attr) { mft_min = mi->rno; @@ -1280,7 +1285,7 @@ static int ni_expand_mft_list(struct ntfs_inode *ni) ni_remove_mi(ni, mi_new); } - attr = mi_find_attr(&ni->mi, NULL, ATTR_DATA, NULL, 0, NULL); + attr = mi_find_attr(ni, &ni->mi, NULL, ATTR_DATA, NULL, 0, NULL); if (!attr) { err = -EINVAL; goto out; @@ -1397,7 +1402,7 @@ int ni_expand_list(struct ntfs_inode *ni) continue; /* Find attribute in primary record. */ - attr = rec_find_attr_le(&ni->mi, le); + attr = rec_find_attr_le(ni, &ni->mi, le); if (!attr) { err = -EINVAL; goto out; @@ -1604,8 +1609,8 @@ int ni_delete_all(struct ntfs_inode *ni) roff = le16_to_cpu(attr->nres.run_off); if (roff > asize) { - _ntfs_bad_inode(&ni->vfs_inode); - return -EINVAL; + /* ni_enum_attr_ex checks this case. */ + continue; } /* run==1 means unpack and deallocate. */ @@ -2726,9 +2731,10 @@ int ni_write_frame(struct ntfs_inode *ni, struct page **pages, { int err; struct ntfs_sb_info *sbi = ni->mi.sbi; + struct folio *folio = page_folio(pages[0]); u8 frame_bits = NTFS_LZNT_CUNIT + sbi->cluster_bits; u32 frame_size = sbi->cluster_size << NTFS_LZNT_CUNIT; - u64 frame_vbo = (u64)pages[0]->index << PAGE_SHIFT; + u64 frame_vbo = folio_pos(folio); CLST frame = frame_vbo >> frame_bits; char *frame_ondisk = NULL; struct page **pages_disk = NULL; @@ -3343,7 +3349,7 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint) if (!mi->dirty) continue; - is_empty = !mi_enum_attr(mi, NULL); + is_empty = !mi_enum_attr(ni, mi, NULL); if (is_empty) clear_rec_inuse(mi->mrec); diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 03471bc9371c..938d351ebac7 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -908,7 +908,11 @@ void ntfs_bad_inode(struct inode *inode, const char *hint) ntfs_inode_err(inode, "%s", hint); make_bad_inode(inode); - ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + /* Avoid recursion if bad inode is $Volume. */ + if (inode->i_ino != MFT_REC_VOL && + !(sbi->flags & NTFS_FLAGS_LOG_REPLAYING)) { + ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + } } /* diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index 9089c58a005c..7eb9fae22f8d 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -1094,8 +1094,7 @@ int indx_read(struct ntfs_index *indx, struct ntfs_inode *ni, CLST vbn, ok: if (!index_buf_check(ib, bytes, &vbn)) { - ntfs_inode_err(&ni->vfs_inode, "directory corrupted"); - ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_ERROR); + _ntfs_bad_inode(&ni->vfs_inode); err = -EINVAL; goto out; } @@ -1117,8 +1116,7 @@ ok: out: if (err == -E_NTFS_CORRUPT) { - ntfs_inode_err(&ni->vfs_inode, "directory corrupted"); - ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_ERROR); + _ntfs_bad_inode(&ni->vfs_inode); err = -EINVAL; } diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index be04d2845bb7..a1e11228dafd 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -410,6 +410,9 @@ end_enum: if (!std5) goto out; + if (is_bad_inode(inode)) + goto out; + if (!is_match && name) { err = -ENOENT; goto out; diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index cd8e8374bb5a..382820464dee 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -745,23 +745,24 @@ int mi_get(struct ntfs_sb_info *sbi, CLST rno, struct mft_inode **mi); void mi_put(struct mft_inode *mi); int mi_init(struct mft_inode *mi, struct ntfs_sb_info *sbi, CLST rno); int mi_read(struct mft_inode *mi, bool is_mft); -struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr); -// TODO: id? -struct ATTRIB *mi_find_attr(struct mft_inode *mi, struct ATTRIB *attr, - enum ATTR_TYPE type, const __le16 *name, - u8 name_len, const __le16 *id); -static inline struct ATTRIB *rec_find_attr_le(struct mft_inode *rec, +struct ATTRIB *mi_enum_attr(struct ntfs_inode *ni, struct mft_inode *mi, + struct ATTRIB *attr); +struct ATTRIB *mi_find_attr(struct ntfs_inode *ni, struct mft_inode *mi, + struct ATTRIB *attr, enum ATTR_TYPE type, + const __le16 *name, u8 name_len, const __le16 *id); +static inline struct ATTRIB *rec_find_attr_le(struct ntfs_inode *ni, + struct mft_inode *rec, struct ATTR_LIST_ENTRY *le) { - return mi_find_attr(rec, NULL, le->type, le_name(le), le->name_len, + return mi_find_attr(ni, rec, NULL, le->type, le_name(le), le->name_len, &le->id); } int mi_write(struct mft_inode *mi, int wait); int mi_format_new(struct mft_inode *mi, struct ntfs_sb_info *sbi, CLST rno, __le16 flags, bool is_mft); -struct ATTRIB *mi_insert_attr(struct mft_inode *mi, enum ATTR_TYPE type, - const __le16 *name, u8 name_len, u32 asize, - u16 name_off); +struct ATTRIB *mi_insert_attr(struct ntfs_inode *ni, struct mft_inode *mi, + enum ATTR_TYPE type, const __le16 *name, + u8 name_len, u32 asize, u16 name_off); bool mi_remove_attr(struct ntfs_inode *ni, struct mft_inode *mi, struct ATTRIB *attr); diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c index 61d53d39f3b9..714c7ecedca8 100644 --- a/fs/ntfs3/record.c +++ b/fs/ntfs3/record.c @@ -31,7 +31,7 @@ static inline int compare_attr(const struct ATTRIB *left, enum ATTR_TYPE type, * * Return: Unused attribute id that is less than mrec->next_attr_id. */ -static __le16 mi_new_attt_id(struct mft_inode *mi) +static __le16 mi_new_attt_id(struct ntfs_inode *ni, struct mft_inode *mi) { u16 free_id, max_id, t16; struct MFT_REC *rec = mi->mrec; @@ -52,7 +52,7 @@ static __le16 mi_new_attt_id(struct mft_inode *mi) attr = NULL; for (;;) { - attr = mi_enum_attr(mi, attr); + attr = mi_enum_attr(ni, mi, attr); if (!attr) { rec->next_attr_id = cpu_to_le16(max_id + 1); mi->dirty = true; @@ -195,7 +195,8 @@ out: * NOTE: mi->mrec - memory of size sbi->record_size * here we sure that mi->mrec->total == sbi->record_size (see mi_read) */ -struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) +struct ATTRIB *mi_enum_attr(struct ntfs_inode *ni, struct mft_inode *mi, + struct ATTRIB *attr) { const struct MFT_REC *rec = mi->mrec; u32 used = le32_to_cpu(rec->used); @@ -209,11 +210,11 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) off = le16_to_cpu(rec->attr_off); if (used > total) - return NULL; + goto out; if (off >= used || off < MFTRECORD_FIXUP_OFFSET_1 || !IS_ALIGNED(off, 8)) { - return NULL; + goto out; } /* Skip non-resident records. */ @@ -243,7 +244,7 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) */ if (off + 8 > used) { static_assert(ALIGN(sizeof(enum ATTR_TYPE), 8) == 8); - return NULL; + goto out; } if (attr->type == ATTR_END) { @@ -254,112 +255,116 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) /* 0x100 is last known attribute for now. */ t32 = le32_to_cpu(attr->type); if (!t32 || (t32 & 0xf) || (t32 > 0x100)) - return NULL; + goto out; /* attributes in record must be ordered by type */ if (t32 < prev_type) - return NULL; + goto out; asize = le32_to_cpu(attr->size); if (!IS_ALIGNED(asize, 8)) - return NULL; + goto out; /* Check overflow and boundary. */ if (off + asize < off || off + asize > used) - return NULL; + goto out; /* Can we use the field attr->non_res. */ if (off + 9 > used) - return NULL; + goto out; /* Check size of attribute. */ if (!attr->non_res) { /* Check resident fields. */ if (asize < SIZEOF_RESIDENT) - return NULL; + goto out; t16 = le16_to_cpu(attr->res.data_off); if (t16 > asize) - return NULL; + goto out; if (le32_to_cpu(attr->res.data_size) > asize - t16) - return NULL; + goto out; t32 = sizeof(short) * attr->name_len; if (t32 && le16_to_cpu(attr->name_off) + t32 > t16) - return NULL; + goto out; return attr; } /* Check nonresident fields. */ if (attr->non_res != 1) - return NULL; + goto out; /* Can we use memory including attr->nres.valid_size? */ if (asize < SIZEOF_NONRESIDENT) - return NULL; + goto out; t16 = le16_to_cpu(attr->nres.run_off); if (t16 > asize) - return NULL; + goto out; t32 = sizeof(short) * attr->name_len; if (t32 && le16_to_cpu(attr->name_off) + t32 > t16) - return NULL; + goto out; /* Check start/end vcn. */ if (le64_to_cpu(attr->nres.svcn) > le64_to_cpu(attr->nres.evcn) + 1) - return NULL; + goto out; data_size = le64_to_cpu(attr->nres.data_size); if (le64_to_cpu(attr->nres.valid_size) > data_size) - return NULL; + goto out; alloc_size = le64_to_cpu(attr->nres.alloc_size); if (data_size > alloc_size) - return NULL; + goto out; t32 = mi->sbi->cluster_mask; if (alloc_size & t32) - return NULL; + goto out; if (!attr->nres.svcn && is_attr_ext(attr)) { /* First segment of sparse/compressed attribute */ /* Can we use memory including attr->nres.total_size? */ if (asize < SIZEOF_NONRESIDENT_EX) - return NULL; + goto out; tot_size = le64_to_cpu(attr->nres.total_size); if (tot_size & t32) - return NULL; + goto out; if (tot_size > alloc_size) - return NULL; + goto out; } else { if (attr->nres.c_unit) - return NULL; + goto out; if (alloc_size > mi->sbi->volume.size) - return NULL; + goto out; } return attr; + +out: + _ntfs_bad_inode(&ni->vfs_inode); + return NULL; } /* * mi_find_attr - Find the attribute by type and name and id. */ -struct ATTRIB *mi_find_attr(struct mft_inode *mi, struct ATTRIB *attr, - enum ATTR_TYPE type, const __le16 *name, - u8 name_len, const __le16 *id) +struct ATTRIB *mi_find_attr(struct ntfs_inode *ni, struct mft_inode *mi, + struct ATTRIB *attr, enum ATTR_TYPE type, + const __le16 *name, u8 name_len, const __le16 *id) { u32 type_in = le32_to_cpu(type); u32 atype; next_attr: - attr = mi_enum_attr(mi, attr); + attr = mi_enum_attr(ni, mi, attr); if (!attr) return NULL; @@ -467,9 +472,9 @@ int mi_format_new(struct mft_inode *mi, struct ntfs_sb_info *sbi, CLST rno, * * Return: Not full constructed attribute or NULL if not possible to create. */ -struct ATTRIB *mi_insert_attr(struct mft_inode *mi, enum ATTR_TYPE type, - const __le16 *name, u8 name_len, u32 asize, - u16 name_off) +struct ATTRIB *mi_insert_attr(struct ntfs_inode *ni, struct mft_inode *mi, + enum ATTR_TYPE type, const __le16 *name, + u8 name_len, u32 asize, u16 name_off) { size_t tail; struct ATTRIB *attr; @@ -488,7 +493,7 @@ struct ATTRIB *mi_insert_attr(struct mft_inode *mi, enum ATTR_TYPE type, * at which we should insert it. */ attr = NULL; - while ((attr = mi_enum_attr(mi, attr))) { + while ((attr = mi_enum_attr(ni, mi, attr))) { int diff = compare_attr(attr, type, name, name_len, upcase); if (diff < 0) @@ -508,7 +513,7 @@ struct ATTRIB *mi_insert_attr(struct mft_inode *mi, enum ATTR_TYPE type, tail = used - PtrOffset(rec, attr); } - id = mi_new_attt_id(mi); + id = mi_new_attt_id(ni, mi); memmove(Add2Ptr(attr, asize), attr, tail); memset(attr, 0, asize); diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index a9b8688aaf30..1873bbbb7e5b 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -32,7 +32,8 @@ void ocfs2_dentry_attach_gen(struct dentry *dentry) } -static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags) +static int ocfs2_dentry_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { struct inode *inode; int ret = 0; /* if all else fails, just return false */ @@ -44,8 +45,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags) inode = d_inode(dentry); osb = OCFS2_SB(dentry->d_sb); - trace_ocfs2_dentry_revalidate(dentry, dentry->d_name.len, - dentry->d_name.name); + trace_ocfs2_dentry_revalidate(dentry, name->len, name->name); /* For a negative dentry - * check the generation number of the parent and compare with the @@ -53,12 +53,8 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags) */ if (inode == NULL) { unsigned long gen = (unsigned long) dentry->d_fsdata; - unsigned long pgen; - spin_lock(&dentry->d_lock); - pgen = OCFS2_I(d_inode(dentry->d_parent))->ip_dir_lock_gen; - spin_unlock(&dentry->d_lock); - trace_ocfs2_dentry_revalidate_negative(dentry->d_name.len, - dentry->d_name.name, + unsigned long pgen = OCFS2_I(dir)->ip_dir_lock_gen; + trace_ocfs2_dentry_revalidate_negative(name->len, name->name, pgen, gen); if (gen != pgen) goto bail; diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 20aa37b67cfb..ddd761cf44c8 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -650,7 +650,7 @@ error: * and easier to preserve the name. */ -static struct ctl_table ocfs2_nm_table[] = { +static const struct ctl_table ocfs2_nm_table[] = { { .procname = "hb_ctl_path", .data = ocfs2_hb_ctl_path, diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c index 395a00ed8ac7..a19d1ad705db 100644 --- a/fs/orangefs/dcache.c +++ b/fs/orangefs/dcache.c @@ -13,10 +13,9 @@ #include "orangefs-kernel.h" /* Returns 1 if dentry can still be trusted, else 0. */ -static int orangefs_revalidate_lookup(struct dentry *dentry) +static int orangefs_revalidate_lookup(struct inode *parent_inode, const struct qstr *name, + struct dentry *dentry) { - struct dentry *parent_dentry = dget_parent(dentry); - struct inode *parent_inode = parent_dentry->d_inode; struct orangefs_inode_s *parent = ORANGEFS_I(parent_inode); struct inode *inode = dentry->d_inode; struct orangefs_kernel_op_s *new_op; @@ -26,14 +25,14 @@ static int orangefs_revalidate_lookup(struct dentry *dentry) gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: attempting lookup.\n", __func__); new_op = op_alloc(ORANGEFS_VFS_OP_LOOKUP); - if (!new_op) { - ret = -ENOMEM; - goto out_put_parent; - } + if (!new_op) + return -ENOMEM; new_op->upcall.req.lookup.sym_follow = ORANGEFS_LOOKUP_LINK_NO_FOLLOW; new_op->upcall.req.lookup.parent_refn = parent->refn; - strscpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name); + /* op_alloc() leaves ->upcall zeroed */ + memcpy(new_op->upcall.req.lookup.d_name, name->name, + min(name->len, ORANGEFS_NAME_MAX - 1)); gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d interrupt flag [%d]\n", @@ -78,8 +77,6 @@ static int orangefs_revalidate_lookup(struct dentry *dentry) ret = 1; out_release_op: op_release(new_op); -out_put_parent: - dput(parent_dentry); return ret; out_drop: gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d revalidate failed\n", @@ -92,7 +89,8 @@ out_drop: * * Should return 1 if dentry can still be trusted, else 0. */ -static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags) +static int orangefs_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { int ret; unsigned long time = (unsigned long) dentry->d_fsdata; @@ -114,7 +112,7 @@ static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags) * If this passes, the positive dentry still exists or the negative * dentry still does not exist. */ - if (!orangefs_revalidate_lookup(dentry)) + if (!orangefs_revalidate_lookup(dir, name, dentry)) return 0; /* We do not need to continue with negative dentries. */ diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index 1b508f543384..f52073022fae 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -206,8 +206,8 @@ static void orangefs_kernel_debug_init(void) pr_info("%s: overflow 1!\n", __func__); } - debugfs_create_file(ORANGEFS_KMOD_DEBUG_FILE, 0444, debug_dir, k_buffer, - &kernel_debug_fops); + debugfs_create_file_aux_num(ORANGEFS_KMOD_DEBUG_FILE, 0444, debug_dir, k_buffer, + 0, &kernel_debug_fops); } @@ -306,11 +306,10 @@ static void orangefs_client_debug_init(void) pr_info("%s: overflow! 2\n", __func__); } - client_debug_dentry = debugfs_create_file(ORANGEFS_CLIENT_DEBUG_FILE, - 0444, - debug_dir, - c_buffer, - &kernel_debug_fops); + client_debug_dentry = debugfs_create_file_aux_num( + ORANGEFS_CLIENT_DEBUG_FILE, + 0444, debug_dir, c_buffer, 1, + &kernel_debug_fops); } /* open ORANGEFS_KMOD_DEBUG_FILE or ORANGEFS_CLIENT_DEBUG_FILE.*/ @@ -393,9 +392,9 @@ static ssize_t orangefs_debug_write(struct file *file, * Thwart users who try to jamb a ridiculous number * of bytes into the debug file... */ - if (count > ORANGEFS_MAX_DEBUG_STRING_LEN + 1) { + if (count > ORANGEFS_MAX_DEBUG_STRING_LEN) { silly = count; - count = ORANGEFS_MAX_DEBUG_STRING_LEN + 1; + count = ORANGEFS_MAX_DEBUG_STRING_LEN; } buf = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL); @@ -418,8 +417,7 @@ static ssize_t orangefs_debug_write(struct file *file, * A service operation is required to set a new client-side * debug mask. */ - if (!strcmp(file->f_path.dentry->d_name.name, - ORANGEFS_KMOD_DEBUG_FILE)) { + if (!debugfs_get_aux_num(file)) { // kernel-debug debug_string_to_mask(buf, &orangefs_gossip_debug_mask, 0); debug_mask_to_string(&orangefs_gossip_debug_mask, 0); debug_string = kernel_debug_string; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index fe511192f83c..86ae6f6da36b 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -91,7 +91,24 @@ static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak) if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) ret = d->d_op->d_weak_revalidate(d, flags); } else if (d->d_flags & DCACHE_OP_REVALIDATE) { - ret = d->d_op->d_revalidate(d, flags); + struct dentry *parent; + struct inode *dir; + struct name_snapshot n; + + if (flags & LOOKUP_RCU) { + parent = READ_ONCE(d->d_parent); + dir = d_inode_rcu(parent); + if (!dir) + return -ECHILD; + } else { + parent = dget_parent(d); + dir = d_inode(parent); + } + take_dentry_name_snapshot(&n, d); + ret = d->d_op->d_revalidate(dir, &n.name, d, flags); + release_dentry_name_snapshot(&n); + if (!(flags & LOOKUP_RCU)) + dput(parent); if (!ret) { if (!(flags & LOOKUP_RCU)) d_invalidate(d); @@ -127,7 +144,8 @@ static int ovl_dentry_revalidate_common(struct dentry *dentry, return ret; } -static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) +static int ovl_dentry_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { return ovl_dentry_revalidate_common(dentry, flags, false); } diff --git a/fs/pipe.c b/fs/pipe.c index 82fede0f2111..94b59045ab44 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1478,7 +1478,7 @@ static int proc_dopipe_max_size(const struct ctl_table *table, int write, do_proc_dopipe_max_size_conv, NULL); } -static struct ctl_table fs_pipe_sysctls[] = { +static const struct ctl_table fs_pipe_sysctls[] = { { .procname = "pipe-max-size", .data = &pipe_max_size, diff --git a/fs/proc/base.c b/fs/proc/base.c index a50b222a5917..cd89e956c322 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2058,7 +2058,8 @@ void pid_update_inode(struct task_struct *task, struct inode *inode) * performed a setuid(), etc. * */ -static int pid_revalidate(struct dentry *dentry, unsigned int flags) +static int pid_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { struct inode *inode; struct task_struct *task; @@ -2191,7 +2192,8 @@ static int dname_to_vma_addr(struct dentry *dentry, return 0; } -static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) +static int map_files_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { unsigned long vm_start, vm_end; bool exact_vma_exists = false; diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 24baf23e864f..37aa778d1af7 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -140,7 +140,8 @@ static void tid_fd_update_inode(struct task_struct *task, struct inode *inode, security_task_to_inode(task, inode); } -static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) +static int tid_fd_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { struct task_struct *task; struct inode *inode; diff --git a/fs/proc/generic.c b/fs/proc/generic.c index dbe82cf23ee4..8ec90826a49e 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -216,7 +216,8 @@ void proc_free_inum(unsigned int inum) ida_free(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); } -static int proc_misc_d_revalidate(struct dentry *dentry, unsigned int flags) +static int proc_misc_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { if (flags & LOOKUP_RCU) return -ECHILD; @@ -343,7 +344,8 @@ static const struct file_operations proc_dir_operations = { .iterate_shared = proc_readdir, }; -static int proc_net_d_revalidate(struct dentry *dentry, unsigned int flags) +static int proc_net_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { return 0; } diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 27a283d85a6e..cc9d74a06ff0 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -884,7 +884,8 @@ static const struct inode_operations proc_sys_dir_operations = { .getattr = proc_sys_getattr, }; -static int proc_sys_revalidate(struct dentry *dentry, unsigned int flags) +static int proc_sys_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { if (flags & LOOKUP_RCU) return -ECHILD; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index f9578918cfb2..825c5c2e0962 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2926,7 +2926,7 @@ static int do_proc_dqstats(const struct ctl_table *table, int write, return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } -static struct ctl_table fs_dqstats_table[] = { +static const struct ctl_table fs_dqstats_table[] = { { .procname = "lookups", .data = &dqstats.stat[DQST_LOOKUPS], diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c index 1822493dd084..d1e95632ac54 100644 --- a/fs/smb/client/dir.c +++ b/fs/smb/client/dir.c @@ -737,7 +737,8 @@ again: } static int -cifs_d_revalidate(struct dentry *direntry, unsigned int flags) +cifs_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *direntry, unsigned int flags) { struct inode *inode; int rc; diff --git a/fs/sysctls.c b/fs/sysctls.c index 8dbde9a802fa..ad429dffeb4b 100644 --- a/fs/sysctls.c +++ b/fs/sysctls.c @@ -7,7 +7,7 @@ #include <linux/init.h> #include <linux/sysctl.h> -static struct ctl_table fs_shared_sysctls[] = { +static const struct ctl_table fs_shared_sysctls[] = { { .procname = "overflowuid", .data = &fs_overflowuid, diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 785408861c01..6931308876c4 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -817,7 +817,7 @@ EXPORT_SYMBOL_GPL(sysfs_emit_at); * Returns number of bytes written to @buf. */ ssize_t sysfs_bin_attr_simple_read(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { memcpy(buf, attr->private + off, count); diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index cfc614c638da..53214499e384 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -457,7 +457,8 @@ static void tracefs_d_release(struct dentry *dentry) eventfs_d_release(dentry); } -static int tracefs_d_revalidate(struct dentry *dentry, unsigned int flags) +static int tracefs_d_revalidate(struct inode *inode, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { struct eventfs_inode *ei = dentry->d_fsdata; diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 5cc69beaa62e..b01f382ce8db 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -863,7 +863,6 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum) out: vfree(buf); - return; } void ubifs_dump_znode(const struct ubifs_info *c, @@ -946,16 +945,20 @@ void ubifs_dump_tnc(struct ubifs_info *c) pr_err("\n"); pr_err("(pid %d) start dumping TNC tree\n", current->pid); - znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL); - level = znode->level; - pr_err("== Level %d ==\n", level); - while (znode) { - if (level != znode->level) { - level = znode->level; - pr_err("== Level %d ==\n", level); + if (c->zroot.znode) { + znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL); + level = znode->level; + pr_err("== Level %d ==\n", level); + while (znode) { + if (level != znode->level) { + level = znode->level; + pr_err("== Level %d ==\n", level); + } + ubifs_dump_znode(c, znode); + znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode); } - ubifs_dump_znode(c, znode); - znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode); + } else { + pr_err("empty TNC tree in memory\n"); } pr_err("(pid %d) finish dumping TNC tree\n", current->pid); } diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index aa8837e6247c..f2cb214581fd 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -1932,7 +1932,6 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) pr_err("(pid %d) finish dumping LEB %d\n", current->pid, lnum); out: vfree(buf); - return; } /** diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 7c0bd0b55f88..97c4d71115d8 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -36,7 +36,7 @@ static int sysctl_unprivileged_userfaultfd __read_mostly; #ifdef CONFIG_SYSCTL -static struct ctl_table vm_userfaultfd_table[] = { +static const struct ctl_table vm_userfaultfd_table[] = { { .procname = "unprivileged_userfaultfd", .data = &sysctl_unprivileged_userfaultfd, diff --git a/fs/vboxsf/dir.c b/fs/vboxsf/dir.c index 5f1a14d5b927..a859ac9b74ba 100644 --- a/fs/vboxsf/dir.c +++ b/fs/vboxsf/dir.c @@ -192,7 +192,8 @@ const struct file_operations vboxsf_dir_fops = { * This is called during name resolution/lookup to check if the @dentry in * the cache is still valid. the job is handled by vboxsf_inode_revalidate. */ -static int vboxsf_dentry_revalidate(struct dentry *dentry, unsigned int flags) +static int vboxsf_dentry_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { if (flags & LOOKUP_RCU) return -ECHILD; diff --git a/fs/verity/init.c b/fs/verity/init.c index f440f0e61e3e..6e8d33b50240 100644 --- a/fs/verity/init.c +++ b/fs/verity/init.c @@ -10,7 +10,7 @@ #include <linux/ratelimit.h> #ifdef CONFIG_SYSCTL -static struct ctl_table fsverity_sysctl_table[] = { +static const struct ctl_table fsverity_sysctl_table[] = { #ifdef CONFIG_FS_VERITY_BUILTIN_SIGNATURES { .procname = "require_signatures", diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c index c84df23b494d..751dc74a3067 100644 --- a/fs/xfs/xfs_sysctl.c +++ b/fs/xfs/xfs_sysctl.c @@ -66,7 +66,7 @@ xfs_deprecated_dointvec_minmax( return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); } -static struct ctl_table xfs_table[] = { +static const struct ctl_table xfs_table[] = { { .procname = "irix_sgid_inherit", .data = &xfs_params.sgid_inherit.val, diff --git a/include/cxl/event.h b/include/cxl/event.h index 0bea1afbd747..04edd44bd26f 100644 --- a/include/cxl/event.h +++ b/include/cxl/event.h @@ -18,7 +18,8 @@ struct cxl_event_record_hdr { __le16 related_handle; __le64 timestamp; u8 maint_op_class; - u8 reserved[15]; + u8 maint_op_sub_class; + u8 reserved[14]; } __packed; struct cxl_event_media_hdr { @@ -44,19 +45,22 @@ struct cxl_event_generic { /* * General Media Event Record - * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + * CXL rev 3.1 Section 8.2.9.2.1.1; Table 8-45 */ #define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10 struct cxl_event_gen_media { struct cxl_event_media_hdr media_hdr; u8 device[3]; u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; - u8 reserved[46]; + u8 cme_threshold_ev_flags; + u8 cme_count[3]; + u8 sub_type; + u8 reserved[41]; } __packed; /* * DRAM Event Record - DER - * CXL rev 3.0 section 8.2.9.2.1.2; Table 3-44 + * CXL rev 3.1 section 8.2.9.2.1.2; Table 8-46 */ #define CXL_EVENT_DER_CORRECTION_MASK_SIZE 0x20 struct cxl_event_dram { @@ -67,12 +71,17 @@ struct cxl_event_dram { u8 row[3]; u8 column[2]; u8 correction_mask[CXL_EVENT_DER_CORRECTION_MASK_SIZE]; - u8 reserved[0x17]; + u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; + u8 sub_channel; + u8 cme_threshold_ev_flags; + u8 cvme_count[3]; + u8 sub_type; + u8 reserved; } __packed; /* * Get Health Info Record - * CXL rev 3.0 section 8.2.9.8.3.1; Table 8-100 + * CXL rev 3.1 section 8.2.9.9.3.1; Table 8-133 */ struct cxl_get_health_info { u8 health_status; @@ -87,13 +96,16 @@ struct cxl_get_health_info { /* * Memory Module Event Record - * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 + * CXL rev 3.1 section 8.2.9.2.1.3; Table 8-47 */ struct cxl_event_mem_module { struct cxl_event_record_hdr hdr; u8 event_type; struct cxl_get_health_info info; - u8 reserved[0x3d]; + u8 validity_flags[2]; + u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; + u8 event_sub_type; + u8 reserved[0x2a]; } __packed; union cxl_event { diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index a0a9007cc1e3..9ebb53f031cd 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -900,8 +900,22 @@ void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs); void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv); void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset); -void blk_mq_freeze_queue(struct request_queue *q); -void blk_mq_unfreeze_queue(struct request_queue *q); +void blk_mq_freeze_queue_nomemsave(struct request_queue *q); +void blk_mq_unfreeze_queue_nomemrestore(struct request_queue *q); +static inline unsigned int __must_check +blk_mq_freeze_queue(struct request_queue *q) +{ + unsigned int memflags = memalloc_noio_save(); + + blk_mq_freeze_queue_nomemsave(q); + return memflags; +} +static inline void +blk_mq_unfreeze_queue(struct request_queue *q, unsigned int memflags) +{ + blk_mq_unfreeze_queue_nomemrestore(q); + memalloc_noio_restore(memflags); +} void blk_freeze_queue_start(struct request_queue *q); void blk_mq_freeze_queue_wait(struct request_queue *q); int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 76f0a4e7c2e5..248416ecd01c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -561,7 +561,6 @@ struct request_queue { struct list_head flush_list; struct mutex sysfs_lock; - struct mutex sysfs_dir_lock; struct mutex limits_lock; /* @@ -605,8 +604,6 @@ struct request_queue { * Serializes all debugfs metadata operations using the above dentries. */ struct mutex debugfs_mutex; - - bool mq_sysfs_init_done; }; /* Keep blk_queue_flag_name[] in sync with the definitions below */ diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 2d7d86f0290d..c7f2c63b3bc3 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -504,20 +504,6 @@ struct ceph_mds_request_head_legacy { #define CEPH_MDS_REQUEST_HEAD_VERSION 3 -struct ceph_mds_request_head_old { - __le16 version; /* struct version */ - __le64 oldest_client_tid; - __le32 mdsmap_epoch; /* on client */ - __le32 flags; /* CEPH_MDS_FLAG_* */ - __u8 num_retry, num_fwd; /* count retry, fwd attempts */ - __le16 num_releases; /* # include cap/lease release records */ - __le32 op; /* mds op code */ - __le32 caller_uid, caller_gid; - __le64 ino; /* use this ino for openc, mkdir, mknod, - etc. (if replaying) */ - union ceph_mds_request_args_ext args; -} __attribute__ ((packed)); - struct ceph_mds_request_head { __le16 version; /* struct version */ __le64 oldest_client_tid; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index bdcec1732445..6a0a8f1c7c90 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -77,6 +77,7 @@ extern ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/include/linux/dcache.h b/include/linux/dcache.h index bff956f7b2b9..9a1a30857763 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -68,16 +68,24 @@ extern const struct qstr dotdot_name; * large memory footprint increase). */ #ifdef CONFIG_64BIT -# define DNAME_INLINE_LEN 40 /* 192 bytes */ +# define DNAME_INLINE_WORDS 5 /* 192 bytes */ #else # ifdef CONFIG_SMP -# define DNAME_INLINE_LEN 36 /* 128 bytes */ +# define DNAME_INLINE_WORDS 9 /* 128 bytes */ # else -# define DNAME_INLINE_LEN 44 /* 128 bytes */ +# define DNAME_INLINE_WORDS 11 /* 128 bytes */ # endif #endif +#define DNAME_INLINE_LEN (DNAME_INLINE_WORDS*sizeof(unsigned long)) + +union shortname_store { + unsigned char string[DNAME_INLINE_LEN]; + unsigned long words[DNAME_INLINE_WORDS]; +}; + #define d_lock d_lockref.lock +#define d_iname d_shortname.string struct dentry { /* RCU lookup touched fields */ @@ -88,7 +96,7 @@ struct dentry { struct qstr d_name; struct inode *d_inode; /* Where the name belongs to - NULL is * negative */ - unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */ + union shortname_store d_shortname; /* --- cacheline 1 boundary (64 bytes) was 32 bytes ago --- */ /* Ref lookup also touches following */ @@ -136,7 +144,8 @@ enum d_real_type { }; struct dentry_operations { - int (*d_revalidate)(struct dentry *, unsigned int); + int (*d_revalidate)(struct inode *, const struct qstr *, + struct dentry *, unsigned int); int (*d_weak_revalidate)(struct dentry *, unsigned int); int (*d_hash)(const struct dentry *, struct qstr *); int (*d_compare)(const struct dentry *, @@ -150,6 +159,8 @@ struct dentry_operations { struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(const struct path *, bool); struct dentry *(*d_real)(struct dentry *, enum d_real_type type); + bool (*d_unalias_trylock)(const struct dentry *); + void (*d_unalias_unlock)(const struct dentry *); } ____cacheline_aligned; /* @@ -589,7 +600,7 @@ static inline struct inode *d_real_inode(const struct dentry *dentry) struct name_snapshot { struct qstr name; - unsigned char inline_name[DNAME_INLINE_LEN]; + union shortname_store inline_name; }; void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *); void release_dentry_name_snapshot(struct name_snapshot *); diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 59444b495d49..fa2568b4380d 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -67,21 +67,23 @@ static const struct file_operations __fops = { \ typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *); -#if defined(CONFIG_DEBUG_FS) - -struct dentry *debugfs_lookup(const char *name, struct dentry *parent); - struct debugfs_short_fops { ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); loff_t (*llseek) (struct file *, loff_t, int); }; +#if defined(CONFIG_DEBUG_FS) + +struct dentry *debugfs_lookup(const char *name, struct dentry *parent); + struct dentry *debugfs_create_file_full(const char *name, umode_t mode, struct dentry *parent, void *data, + const void *aux, const struct file_operations *fops); struct dentry *debugfs_create_file_short(const char *name, umode_t mode, struct dentry *parent, void *data, + const void *aux, const struct debugfs_short_fops *fops); /** @@ -126,7 +128,15 @@ struct dentry *debugfs_create_file_short(const char *name, umode_t mode, const struct debugfs_short_fops *: debugfs_create_file_short, \ struct file_operations *: debugfs_create_file_full, \ struct debugfs_short_fops *: debugfs_create_file_short) \ - (name, mode, parent, data, fops) + (name, mode, parent, data, NULL, fops) + +#define debugfs_create_file_aux(name, mode, parent, data, aux, fops) \ + _Generic(fops, \ + const struct file_operations *: debugfs_create_file_full, \ + const struct debugfs_short_fops *: debugfs_create_file_short, \ + struct file_operations *: debugfs_create_file_full, \ + struct debugfs_short_fops *: debugfs_create_file_short) \ + (name, mode, parent, data, aux, fops) struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode, struct dentry *parent, void *data, @@ -153,6 +163,7 @@ void debugfs_remove(struct dentry *dentry); void debugfs_lookup_and_remove(const char *name, struct dentry *parent); const struct file_operations *debugfs_real_fops(const struct file *filp); +const void *debugfs_get_aux(const struct file *file); int debugfs_file_get(struct dentry *dentry); void debugfs_file_put(struct dentry *dentry); @@ -164,8 +175,7 @@ ssize_t debugfs_attr_write(struct file *file, const char __user *buf, ssize_t debugfs_attr_write_signed(struct file *file, const char __user *buf, size_t len, loff_t *ppos); -struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, - struct dentry *new_dir, const char *new_name); +int debugfs_change_name(struct dentry *dentry, const char *fmt, ...) __printf(2, 3); void debugfs_create_u8(const char *name, umode_t mode, struct dentry *parent, u8 *value); @@ -259,6 +269,14 @@ static inline struct dentry *debugfs_lookup(const char *name, return ERR_PTR(-ENODEV); } +static inline struct dentry *debugfs_create_file_aux(const char *name, + umode_t mode, struct dentry *parent, + void *data, void *aux, + const void *fops) +{ + return ERR_PTR(-ENODEV); +} + static inline struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const void *fops) @@ -312,6 +330,7 @@ static inline void debugfs_lookup_and_remove(const char *name, { } const struct file_operations *debugfs_real_fops(const struct file *filp); +void *debugfs_get_aux(const struct file *file); static inline int debugfs_file_get(struct dentry *dentry) { @@ -341,10 +360,10 @@ static inline ssize_t debugfs_attr_write_signed(struct file *file, return -ENODEV; } -static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, - struct dentry *new_dir, char *new_name) +static inline int __printf(2, 3) debugfs_change_name(struct dentry *dentry, + const char *fmt, ...) { - return ERR_PTR(-ENODEV); + return -ENODEV; } static inline void debugfs_create_u8(const char *name, umode_t mode, @@ -452,6 +471,11 @@ static inline ssize_t debugfs_read_file_str(struct file *file, #endif +#define debugfs_create_file_aux_num(name, mode, parent, data, n, fops) \ + debugfs_create_file_aux(name, mode, parent, data, \ + (void *)(unsigned long)n, fops) +#define debugfs_get_aux_num(f) (unsigned long)debugfs_get_aux(f) + /** * debugfs_create_xul - create a debugfs file that is used to read and write an * unsigned long value, formatted in hexadecimal diff --git a/include/linux/device.h b/include/linux/device.h index 667cb6db9019..80a5b3268986 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -399,7 +399,23 @@ void __iomem *devm_of_iomap(struct device *dev, #endif /* allows to add/remove a custom action to devres stack */ -void devm_remove_action(struct device *dev, void (*action)(void *), void *data); +int devm_remove_action_nowarn(struct device *dev, void (*action)(void *), void *data); + +/** + * devm_remove_action() - removes previously added custom action + * @dev: Device that owns the action + * @action: Function implementing the action + * @data: Pointer to data passed to @action implementation + * + * Removes instance of @action previously added by devm_add_action(). + * Both action and data should match one of the existing entries. + */ +static inline +void devm_remove_action(struct device *dev, void (*action)(void *), void *data) +{ + WARN_ON(devm_remove_action_nowarn(dev, action, data)); +} + void devm_release_action(struct device *dev, void (*action)(void *), void *data); int __devm_add_action(struct device *dev, void (*action)(void *), void *data, const char *name); @@ -1074,18 +1090,44 @@ void device_del(struct device *dev); DEFINE_FREE(device_del, struct device *, if (_T) device_del(_T)) -int device_for_each_child(struct device *dev, void *data, - int (*fn)(struct device *dev, void *data)); -int device_for_each_child_reverse(struct device *dev, void *data, - int (*fn)(struct device *dev, void *data)); +int device_for_each_child(struct device *parent, void *data, + device_iter_t fn); +int device_for_each_child_reverse(struct device *parent, void *data, + device_iter_t fn); int device_for_each_child_reverse_from(struct device *parent, - struct device *from, const void *data, - int (*fn)(struct device *, const void *)); -struct device *device_find_child(struct device *dev, void *data, - int (*match)(struct device *dev, void *data)); -struct device *device_find_child_by_name(struct device *parent, - const char *name); -struct device *device_find_any_child(struct device *parent); + struct device *from, void *data, + device_iter_t fn); +struct device *device_find_child(struct device *parent, const void *data, + device_match_t match); +/** + * device_find_child_by_name - device iterator for locating a child device. + * @parent: parent struct device + * @name: name of the child device + * + * This is similar to the device_find_child() function above, but it + * returns a reference to a device that has the name @name. + * + * NOTE: you will need to drop the reference with put_device() after use. + */ +static inline struct device *device_find_child_by_name(struct device *parent, + const char *name) +{ + return device_find_child(parent, name, device_match_name); +} + +/** + * device_find_any_child - device iterator for locating a child device, if any. + * @parent: parent struct device + * + * This is similar to the device_find_child() function above, but it + * returns a reference to a child device, if any. + * + * NOTE: you will need to drop the reference with put_device() after use. + */ +static inline struct device *device_find_any_child(struct device *parent) +{ + return device_find_child(parent, NULL, device_match_any); +} int device_rename(struct device *dev, const char *new_name); int device_move(struct device *dev, struct device *new_parent, diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index b18658bce2c3..f5a56efd2bd6 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -134,6 +134,7 @@ typedef int (*device_match_t)(struct device *dev, const void *data); /* Generic device matching functions that all busses can use to match with */ int device_match_name(struct device *dev, const void *name); +int device_match_type(struct device *dev, const void *type); int device_match_of_node(struct device *dev, const void *np); int device_match_fwnode(struct device *dev, const void *fwnode); int device_match_devt(struct device *dev, const void *pdevt); @@ -141,9 +142,12 @@ int device_match_acpi_dev(struct device *dev, const void *adev); int device_match_acpi_handle(struct device *dev, const void *handle); int device_match_any(struct device *dev, const void *unused); +/* Device iterating function type for various driver core for_each APIs */ +typedef int (*device_iter_t)(struct device *dev, void *data); + /* iterator helpers for buses */ -int bus_for_each_dev(const struct bus_type *bus, struct device *start, void *data, - int (*fn)(struct device *dev, void *data)); +int bus_for_each_dev(const struct bus_type *bus, struct device *start, + void *data, device_iter_t fn); struct device *bus_find_device(const struct bus_type *bus, struct device *start, const void *data, device_match_t match); /** diff --git a/include/linux/device/class.h b/include/linux/device/class.h index 518c9c83d64b..45ee3a634999 100644 --- a/include/linux/device/class.h +++ b/include/linux/device/class.h @@ -82,18 +82,16 @@ bool class_is_registered(const struct class *class); struct class_compat; struct class_compat *class_compat_register(const char *name); void class_compat_unregister(struct class_compat *cls); -int class_compat_create_link(struct class_compat *cls, struct device *dev, - struct device *device_link); -void class_compat_remove_link(struct class_compat *cls, struct device *dev, - struct device *device_link); +int class_compat_create_link(struct class_compat *cls, struct device *dev); +void class_compat_remove_link(struct class_compat *cls, struct device *dev); void class_dev_iter_init(struct class_dev_iter *iter, const struct class *class, const struct device *start, const struct device_type *type); struct device *class_dev_iter_next(struct class_dev_iter *iter); void class_dev_iter_exit(struct class_dev_iter *iter); -int class_for_each_device(const struct class *class, const struct device *start, void *data, - int (*fn)(struct device *dev, void *data)); +int class_for_each_device(const struct class *class, const struct device *start, + void *data, device_iter_t fn); struct device *class_find_device(const struct class *class, const struct device *start, const void *data, device_match_t match); diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index 5c04b8e3833b..cd8e0f0a634b 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -154,7 +154,7 @@ void driver_remove_file(const struct device_driver *driver, int driver_set_override(struct device *dev, const char **override, const char *s, size_t len); int __must_check driver_for_each_device(struct device_driver *drv, struct device *start, - void *data, int (*fn)(struct device *dev, void *)); + void *data, device_iter_t fn); struct device *driver_find_device(const struct device_driver *drv, struct device *start, const void *data, device_match_t match); diff --git a/include/linux/export.h b/include/linux/export.h index 2633df4d31e6..a8c23d945634 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -52,9 +52,24 @@ #else +#ifdef CONFIG_GENDWARFKSYMS +/* + * With CONFIG_GENDWARFKSYMS, ensure the compiler emits debugging + * information for all exported symbols, including those defined in + * different TUs, by adding a __gendwarfksyms_ptr_<symbol> pointer + * that's discarded during the final link. + */ +#define __GENDWARFKSYMS_EXPORT(sym) \ + static typeof(sym) *__gendwarfksyms_ptr_##sym __used \ + __section(".discard.gendwarfksyms") = &sym; +#else +#define __GENDWARFKSYMS_EXPORT(sym) +#endif + #define __EXPORT_SYMBOL(sym, license, ns) \ extern typeof(sym) sym; \ __ADDRESSABLE(sym) \ + __GENDWARFKSYMS_EXPORT(sym) \ asm(__stringify(___EXPORT_SYMBOL(sym, license, ns))) #endif diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 772f822dc6b8..18855cb44b1c 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -192,7 +192,8 @@ struct fscrypt_operations { unsigned int *num_devs); }; -int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); +int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags); static inline struct fscrypt_inode_info * fscrypt_get_inode_info(const struct inode *inode) @@ -711,8 +712,8 @@ static inline u64 fscrypt_fname_siphash(const struct inode *dir, return 0; } -static inline int fscrypt_d_revalidate(struct dentry *dentry, - unsigned int flags) +static inline int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { return 1; } diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index c90ec889bfc2..99f30c7d6208 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -438,21 +438,21 @@ struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev, extern const struct bus_type fsl_mc_bus_type; -extern struct device_type fsl_mc_bus_dprc_type; -extern struct device_type fsl_mc_bus_dpni_type; -extern struct device_type fsl_mc_bus_dpio_type; -extern struct device_type fsl_mc_bus_dpsw_type; -extern struct device_type fsl_mc_bus_dpbp_type; -extern struct device_type fsl_mc_bus_dpcon_type; -extern struct device_type fsl_mc_bus_dpmcp_type; -extern struct device_type fsl_mc_bus_dpmac_type; -extern struct device_type fsl_mc_bus_dprtc_type; -extern struct device_type fsl_mc_bus_dpseci_type; -extern struct device_type fsl_mc_bus_dpdmux_type; -extern struct device_type fsl_mc_bus_dpdcei_type; -extern struct device_type fsl_mc_bus_dpaiop_type; -extern struct device_type fsl_mc_bus_dpci_type; -extern struct device_type fsl_mc_bus_dpdmai_type; +extern const struct device_type fsl_mc_bus_dprc_type; +extern const struct device_type fsl_mc_bus_dpni_type; +extern const struct device_type fsl_mc_bus_dpio_type; +extern const struct device_type fsl_mc_bus_dpsw_type; +extern const struct device_type fsl_mc_bus_dpbp_type; +extern const struct device_type fsl_mc_bus_dpcon_type; +extern const struct device_type fsl_mc_bus_dpmcp_type; +extern const struct device_type fsl_mc_bus_dpmac_type; +extern const struct device_type fsl_mc_bus_dprtc_type; +extern const struct device_type fsl_mc_bus_dpseci_type; +extern const struct device_type fsl_mc_bus_dpdmux_type; +extern const struct device_type fsl_mc_bus_dpdcei_type; +extern const struct device_type fsl_mc_bus_dpaiop_type; +extern const struct device_type fsl_mc_bus_dpci_type; +extern const struct device_type fsl_mc_bus_dpdmai_type; static inline bool is_fsl_mc_bus_dprc(const struct fsl_mc_device *mc_dev) { diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index a3ce553413de..abd0c8bd950b 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -19,8 +19,8 @@ struct io_uring_cmd { }; struct io_uring_cmd_data { - struct io_uring_sqe sqes[2]; void *op_data; + struct io_uring_sqe sqes[2]; }; static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 623d8e798a11..3def525a1da3 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -222,7 +222,8 @@ struct io_alloc_cache { void **entries; unsigned int nr_cached; unsigned int max_cached; - size_t elem_size; + unsigned int elem_size; + unsigned int init_clear; }; struct io_ring_ctx { diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h index be707748e7ce..150fe2ae1b6b 100644 --- a/include/linux/kobject_ns.h +++ b/include/linux/kobject_ns.h @@ -52,8 +52,6 @@ const struct kobj_ns_type_operations *kobj_ns_ops(const struct kobject *kobj); bool kobj_ns_current_may_mount(enum kobj_ns_type type); void *kobj_ns_grab_current(enum kobj_ns_type type); -const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk); -const void *kobj_ns_initial(enum kobj_ns_type type); void kobj_ns_drop(enum kobj_ns_type type, void *ns); #endif /* _LINUX_KOBJECT_NS_H */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5f1b2dc788e2..6b27db7f9496 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1406,6 +1406,7 @@ enum tlb_flush_reason { TLB_LOCAL_SHOOTDOWN, TLB_LOCAL_MM_SHOOTDOWN, TLB_REMOTE_SEND_IPI, + TLB_REMOTE_WRONG_CPU, NR_TLB_FLUSH_REASONS, }; diff --git a/include/linux/module.h b/include/linux/module.h index 37eb5d88f6eb..23792d5d7b74 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -430,7 +430,7 @@ struct module { /* Exported symbols */ const struct kernel_symbol *syms; - const s32 *crcs; + const u32 *crcs; unsigned int num_syms; #ifdef CONFIG_ARCH_USES_CFI_TRAPS @@ -448,7 +448,7 @@ struct module { /* GPL-only exported symbols. */ unsigned int num_gpl_syms; const struct kernel_symbol *gpl_syms; - const s32 *gpl_crcs; + const u32 *gpl_crcs; bool using_gplonly_symbols; #ifdef CONFIG_MODULE_SIG diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8da4c61f97b9..2a59034a5fa2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1085,8 +1085,8 @@ struct netdev_net_notifier { * * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); * Old-style ioctl entry point. This is used internally by the - * appletalk and ieee802154 subsystems but is no longer called by - * the device ioctl handler. + * ieee802154 subsystem but is no longer called by the device + * ioctl handler. * * int (*ndo_siocbond)(struct net_device *dev, struct ifreq *ifr, int cmd); * Used by the bonding driver for its device specific ioctls: diff --git a/include/linux/nfs_common.h b/include/linux/nfs_common.h index 5fc02df88252..a541c3a02887 100644 --- a/include/linux/nfs_common.h +++ b/include/linux/nfs_common.h @@ -9,9 +9,10 @@ #include <uapi/linux/nfs.h> /* Mapping from NFS error code to "errno" error code. */ -#define errno_NFSERR_IO EIO int nfs_stat_to_errno(enum nfs_stat status); int nfs4_stat_to_errno(int stat); +__u32 nfs_localio_errno_to_nfs4_stat(int errno); + #endif /* _LINUX_NFS_COMMON_H */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 039898d70954..67ae2c3f41d2 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -77,6 +77,23 @@ struct nfs_lock_context { struct rcu_head rcu_head; }; +struct nfs_file_localio { + struct nfsd_file __rcu *ro_file; + struct nfsd_file __rcu *rw_file; + struct list_head list; + void __rcu *nfs_uuid; /* opaque pointer to 'nfs_uuid_t' */ +}; + +static inline void nfs_localio_file_init(struct nfs_file_localio *nfl) +{ +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + nfl->ro_file = NULL; + nfl->rw_file = NULL; + INIT_LIST_HEAD(&nfl->list); + nfl->nfs_uuid = NULL; +#endif +} + struct nfs4_state; struct nfs_open_context { struct nfs_lock_context lock_context; @@ -87,15 +104,16 @@ struct nfs_open_context { struct nfs4_state *state; fmode_t mode; + int error; unsigned long flags; #define NFS_CONTEXT_BAD (2) #define NFS_CONTEXT_UNLOCK (3) #define NFS_CONTEXT_FILE_OPEN (4) - int error; - struct list_head list; struct nfs4_threshold *mdsthreshold; + struct list_head list; struct rcu_head rcu_head; + struct nfs_file_localio nfl; }; struct nfs_open_dir_context { diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index b804346a9741..f00bfcee7120 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -50,7 +50,6 @@ struct nfs_client { #define NFS_CS_DS 7 /* - Server is a DS */ #define NFS_CS_REUSEPORT 8 /* - reuse src port on reconnect */ #define NFS_CS_PNFS 9 /* - Server used for pnfs */ -#define NFS_CS_LOCAL_IO 10 /* - client is local */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ @@ -132,7 +131,7 @@ struct nfs_client { struct timespec64 cl_nfssvc_boot; seqlock_t cl_boot_lock; nfs_uuid_t cl_uuid; - spinlock_t cl_localio_lock; + struct work_struct cl_local_probe_work; #endif /* CONFIG_NFS_LOCALIO */ }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e74a87bb18a4..9155a6ffc370 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1632,6 +1632,7 @@ enum { NFS_IOHDR_RESEND_PNFS, NFS_IOHDR_RESEND_MDS, NFS_IOHDR_UNSTABLE_WRITES, + NFS_IOHDR_ODIRECT, }; struct nfs_io_completion; @@ -1780,7 +1781,7 @@ struct nfs_rpc_ops { struct nfs_fattr *, struct inode *); int (*setattr) (struct dentry *, struct nfs_fattr *, struct iattr *); - int (*lookup) (struct inode *, struct dentry *, + int (*lookup) (struct inode *, struct dentry *, const struct qstr *, struct nfs_fh *, struct nfs_fattr *); int (*lookupp) (struct inode *, struct nfs_fh *, struct nfs_fattr *); diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h index 9202f4b24343..9aa8a43843d7 100644 --- a/include/linux/nfslocalio.h +++ b/include/linux/nfslocalio.h @@ -6,9 +6,6 @@ #ifndef __LINUX_NFSLOCALIO_H #define __LINUX_NFSLOCALIO_H -/* nfsd_file structure is purposely kept opaque to NFS client */ -struct nfsd_file; - #if IS_ENABLED(CONFIG_NFS_LOCALIO) #include <linux/module.h> @@ -19,6 +16,9 @@ struct nfsd_file; #include <linux/nfs.h> #include <net/net_namespace.h> +struct nfs_client; +struct nfs_file_localio; + /* * Useful to allow a client to negotiate if localio * possible with its server. @@ -27,28 +27,38 @@ struct nfsd_file; */ typedef struct { uuid_t uuid; + unsigned nfs3_localio_probe_count; + /* this struct is over a cacheline, avoid bouncing */ + spinlock_t ____cacheline_aligned lock; struct list_head list; + spinlock_t *list_lock; /* nn->local_clients_lock */ struct net __rcu *net; /* nfsd's network namespace */ struct auth_domain *dom; /* auth_domain for localio */ + /* Local files to close when net is shut down or exports change */ + struct list_head files; } nfs_uuid_t; void nfs_uuid_init(nfs_uuid_t *); bool nfs_uuid_begin(nfs_uuid_t *); void nfs_uuid_end(nfs_uuid_t *); -void nfs_uuid_is_local(const uuid_t *, struct list_head *, +void nfs_uuid_is_local(const uuid_t *, struct list_head *, spinlock_t *, struct net *, struct auth_domain *, struct module *); -void nfs_uuid_invalidate_clients(struct list_head *list); -void nfs_uuid_invalidate_one_client(nfs_uuid_t *nfs_uuid); + +void nfs_localio_enable_client(struct nfs_client *clp); +void nfs_localio_disable_client(struct nfs_client *clp); +void nfs_localio_invalidate_clients(struct list_head *nn_local_clients, + spinlock_t *nn_local_clients_lock); /* localio needs to map filehandle -> struct nfsd_file */ extern struct nfsd_file * nfsd_open_local_fh(struct net *, struct auth_domain *, struct rpc_clnt *, const struct cred *, const struct nfs_fh *, const fmode_t) __must_hold(rcu); +void nfs_close_local_fh(struct nfs_file_localio *); struct nfsd_localio_operations { - bool (*nfsd_serv_try_get)(struct net *); - void (*nfsd_serv_put)(struct net *); + bool (*nfsd_net_try_get)(struct net *); + void (*nfsd_net_put)(struct net *); struct nfsd_file *(*nfsd_open_local_fh)(struct net *, struct auth_domain *, struct rpc_clnt *, @@ -56,6 +66,8 @@ struct nfsd_localio_operations { const struct nfs_fh *, const fmode_t); struct net *(*nfsd_file_put_local)(struct nfsd_file *); + struct nfsd_file *(*nfsd_file_get)(struct nfsd_file *); + void (*nfsd_file_put)(struct nfsd_file *); struct file *(*nfsd_file_file)(struct nfsd_file *); } ____cacheline_aligned; @@ -64,17 +76,18 @@ extern const struct nfsd_localio_operations *nfs_to; struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *, struct rpc_clnt *, const struct cred *, - const struct nfs_fh *, const fmode_t); + const struct nfs_fh *, struct nfs_file_localio *, + const fmode_t); static inline void nfs_to_nfsd_net_put(struct net *net) { /* - * Once reference to nfsd_serv is dropped, NFSD could be - * unloaded, so ensure safe return from nfsd_file_put_local() - * by always taking RCU. + * Once reference to net (and associated nfsd_serv) is dropped, NFSD + * could be unloaded, so ensure safe return from nfsd_net_put() by + * always taking RCU. */ rcu_read_lock(); - nfs_to->nfsd_serv_put(net); + nfs_to->nfsd_net_put(net); rcu_read_unlock(); } @@ -91,12 +104,19 @@ static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio) } #else /* CONFIG_NFS_LOCALIO */ + +struct nfs_file_localio; +static inline void nfs_close_local_fh(struct nfs_file_localio *nfl) +{ +} static inline void nfsd_localio_ops_init(void) { } -static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio) +struct nfs_client; +static inline void nfs_localio_disable_client(struct nfs_client *clp) { } + #endif /* CONFIG_NFS_LOCALIO */ #endif /* __LINUX_NFSLOCALIO_H */ diff --git a/include/linux/pm.h b/include/linux/pm.h index 08c37b83fea8..78855d794342 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -384,12 +384,8 @@ const struct dev_pm_ops name = { \ #ifdef CONFIG_PM #define _EXPORT_DEV_PM_OPS(name, license, ns) _EXPORT_PM_OPS(name, license, ns) -#define EXPORT_PM_FN_GPL(name) EXPORT_SYMBOL_GPL(name) -#define EXPORT_PM_FN_NS_GPL(name, ns) EXPORT_SYMBOL_NS_GPL(name, "ns") #else #define _EXPORT_DEV_PM_OPS(name, license, ns) _DISCARD_PM_OPS(name, license, ns) -#define EXPORT_PM_FN_GPL(name) -#define EXPORT_PM_FN_NS_GPL(name, ns) #endif #ifdef CONFIG_PM_SLEEP @@ -684,6 +680,7 @@ struct dev_pm_info { bool no_pm_callbacks:1; /* Owned by the PM core */ bool async_in_progress:1; /* Owned by the PM core */ bool must_resume:1; /* Owned by the PM core */ + bool set_active:1; /* Owned by the PM core */ bool may_skip_resume:1; /* Set by subsystems */ #else bool should_wakeup:1; diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index bd9836690da6..2d6c30317792 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -54,6 +54,8 @@ struct sdw_slave; #define SDW_MAX_PORTS 15 #define SDW_VALID_PORT_RANGE(n) ((n) < SDW_MAX_PORTS && (n) >= 1) +#define SDW_MAX_LANES 8 + enum { SDW_PORT_DIRN_SINK = 0, SDW_PORT_DIRN_SOURCE, @@ -356,6 +358,7 @@ struct sdw_dpn_prop { * and masks are supported * @commit_register_supported: is PCP_Commit register supported * @scp_int1_mask: SCP_INT1_MASK desired settings + * @lane_maps: Lane mapping for the slave, only valid if lane_control_support is set * @clock_reg_supported: the Peripheral implements the clock base and scale * registers introduced with the SoundWire 1.2 specification. SDCA devices * do not need to set this boolean property as the registers are required. @@ -385,6 +388,7 @@ struct sdw_slave_prop { u32 sdca_interrupt_register_list; u8 commit_register_supported; u8 scp_int1_mask; + u8 lane_maps[SDW_MAX_LANES]; bool clock_reg_supported; bool use_domain_irq; }; @@ -450,6 +454,7 @@ struct sdw_master_prop { int sdw_master_read_prop(struct sdw_bus *bus); int sdw_slave_read_prop(struct sdw_slave *slave); +int sdw_slave_read_lane_mapping(struct sdw_slave *slave); /* * SDW Slave Structures and APIs @@ -850,77 +855,6 @@ struct sdw_master_ops { int dev_num); }; -/** - * struct sdw_bus - SoundWire bus - * @dev: Shortcut to &bus->md->dev to avoid changing the entire code. - * @md: Master device - * @bus_lock_key: bus lock key associated to @bus_lock - * @bus_lock: bus lock - * @slaves: list of Slaves on this bus - * @msg_lock_key: message lock key associated to @msg_lock - * @msg_lock: message lock - * @m_rt_list: List of Master instance of all stream(s) running on Bus. This - * is used to compute and program bus bandwidth, clock, frame shape, - * transport and port parameters - * @defer_msg: Defer message - * @params: Current bus parameters - * @stream_refcount: number of streams currently using this bus - * @ops: Master callback ops - * @port_ops: Master port callback ops - * @prop: Master properties - * @vendor_specific_prop: pointer to non-standard properties - * @hw_sync_min_links: Number of links used by a stream above which - * hardware-based synchronization is required. This value is only - * meaningful if multi_link is set. If set to 1, hardware-based - * synchronization will be used even if a stream only uses a single - * SoundWire segment. - * @controller_id: system-unique controller ID. If set to -1, the bus @id will be used. - * @link_id: Link id number, can be 0 to N, unique for each Controller - * @id: bus system-wide unique id - * @compute_params: points to Bus resource management implementation - * @assigned: Bitmap for Slave device numbers. - * Bit set implies used number, bit clear implies unused number. - * @clk_stop_timeout: Clock stop timeout computed - * @bank_switch_timeout: Bank switch timeout computed - * @domain: IRQ domain - * @irq_chip: IRQ chip - * @debugfs: Bus debugfs (optional) - * @multi_link: Store bus property that indicates if multi links - * are supported. This flag is populated by drivers after reading - * appropriate firmware (ACPI/DT). - */ -struct sdw_bus { - struct device *dev; - struct sdw_master_device *md; - struct lock_class_key bus_lock_key; - struct mutex bus_lock; - struct list_head slaves; - struct lock_class_key msg_lock_key; - struct mutex msg_lock; - struct list_head m_rt_list; - struct sdw_defer defer_msg; - struct sdw_bus_params params; - int stream_refcount; - const struct sdw_master_ops *ops; - const struct sdw_master_port_ops *port_ops; - struct sdw_master_prop prop; - void *vendor_specific_prop; - int hw_sync_min_links; - int controller_id; - unsigned int link_id; - int id; - int (*compute_params)(struct sdw_bus *bus); - DECLARE_BITMAP(assigned, SDW_MAX_DEVICES); - unsigned int clk_stop_timeout; - u32 bank_switch_timeout; - struct irq_chip irq_chip; - struct irq_domain *domain; -#ifdef CONFIG_DEBUG_FS - struct dentry *debugfs; -#endif - bool multi_link; -}; - int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, struct fwnode_handle *fwnode); void sdw_bus_master_delete(struct sdw_bus *bus); @@ -1010,10 +944,83 @@ struct sdw_stream_runtime { struct list_head master_list; }; +/** + * struct sdw_bus - SoundWire bus + * @dev: Shortcut to &bus->md->dev to avoid changing the entire code. + * @md: Master device + * @bus_lock_key: bus lock key associated to @bus_lock + * @bus_lock: bus lock + * @slaves: list of Slaves on this bus + * @msg_lock_key: message lock key associated to @msg_lock + * @msg_lock: message lock + * @m_rt_list: List of Master instance of all stream(s) running on Bus. This + * is used to compute and program bus bandwidth, clock, frame shape, + * transport and port parameters + * @defer_msg: Defer message + * @params: Current bus parameters + * @stream_refcount: number of streams currently using this bus + * @ops: Master callback ops + * @port_ops: Master port callback ops + * @prop: Master properties + * @vendor_specific_prop: pointer to non-standard properties + * @hw_sync_min_links: Number of links used by a stream above which + * hardware-based synchronization is required. This value is only + * meaningful if multi_link is set. If set to 1, hardware-based + * synchronization will be used even if a stream only uses a single + * SoundWire segment. + * @controller_id: system-unique controller ID. If set to -1, the bus @id will be used. + * @link_id: Link id number, can be 0 to N, unique for each Controller + * @id: bus system-wide unique id + * @compute_params: points to Bus resource management implementation + * @assigned: Bitmap for Slave device numbers. + * Bit set implies used number, bit clear implies unused number. + * @clk_stop_timeout: Clock stop timeout computed + * @bank_switch_timeout: Bank switch timeout computed + * @domain: IRQ domain + * @irq_chip: IRQ chip + * @debugfs: Bus debugfs (optional) + * @multi_link: Store bus property that indicates if multi links + * are supported. This flag is populated by drivers after reading + * appropriate firmware (ACPI/DT). + * @lane_used_bandwidth: how much bandwidth in bits per second is used by each lane + */ +struct sdw_bus { + struct device *dev; + struct sdw_master_device *md; + struct lock_class_key bus_lock_key; + struct mutex bus_lock; + struct list_head slaves; + struct lock_class_key msg_lock_key; + struct mutex msg_lock; + struct list_head m_rt_list; + struct sdw_defer defer_msg; + struct sdw_bus_params params; + int stream_refcount; + const struct sdw_master_ops *ops; + const struct sdw_master_port_ops *port_ops; + struct sdw_master_prop prop; + void *vendor_specific_prop; + int hw_sync_min_links; + int controller_id; + unsigned int link_id; + int id; + int (*compute_params)(struct sdw_bus *bus, struct sdw_stream_runtime *stream); + DECLARE_BITMAP(assigned, SDW_MAX_DEVICES); + unsigned int clk_stop_timeout; + u32 bank_switch_timeout; + struct irq_chip irq_chip; + struct irq_domain *domain; +#ifdef CONFIG_DEBUG_FS + struct dentry *debugfs; +#endif + bool multi_link; + unsigned int lane_used_bandwidth[SDW_MAX_LANES]; +}; + struct sdw_stream_runtime *sdw_alloc_stream(const char *stream_name); void sdw_release_stream(struct sdw_stream_runtime *stream); -int sdw_compute_params(struct sdw_bus *bus); +int sdw_compute_params(struct sdw_bus *bus, struct sdw_stream_runtime *stream); int sdw_stream_add_master(struct sdw_bus *bus, struct sdw_stream_config *stream_config, @@ -1034,6 +1041,7 @@ int sdw_bus_exit_clk_stop(struct sdw_bus *bus); int sdw_compare_devid(struct sdw_slave *slave, struct sdw_slave_id id); void sdw_extract_slave_id(struct sdw_bus *bus, u64 addr, struct sdw_slave_id *id); +bool is_clock_scaling_supported_by_slave(struct sdw_slave *slave); #if IS_ENABLED(CONFIG_SOUNDWIRE) @@ -1045,6 +1053,8 @@ int sdw_stream_add_slave(struct sdw_slave *slave, int sdw_stream_remove_slave(struct sdw_slave *slave, struct sdw_stream_runtime *stream); +int sdw_slave_get_scale_index(struct sdw_slave *slave, u8 *base); + /* messaging and data APIs */ int sdw_read(struct sdw_slave *slave, u32 addr); int sdw_write(struct sdw_slave *slave, u32 addr, u8 value); diff --git a/include/linux/stat.h b/include/linux/stat.h index 9d8382e23a9c..be7496a6a0dd 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -50,11 +50,11 @@ struct kstat { struct timespec64 btime; /* File creation time */ u64 blocks; u64 mnt_id; + u64 change_cookie; + u64 subvol; u32 dio_mem_align; u32 dio_offset_align; u32 dio_read_offset_align; - u64 change_cookie; - u64 subvol; u32 atomic_write_unit_min; u32 atomic_write_unit_max; u32 atomic_write_segments_max; diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 5321585c778f..fec976e58174 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -93,6 +93,7 @@ struct rpc_clnt { const struct cred *cl_cred; unsigned int cl_max_connect; /* max number of transports not to the same IP */ struct super_block *pipefs_sb; + atomic_t cl_task_count; }; /* diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 0f2fcd244523..18f7e1fd093c 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -293,7 +293,7 @@ __ATTRIBUTE_GROUPS(_name) #define BIN_ATTRIBUTE_GROUPS(_name) \ static const struct attribute_group _name##_group = { \ - .bin_attrs = _name##_attrs, \ + .bin_attrs_new = _name##_attrs, \ }; \ __ATTRIBUTE_GROUPS(_name) @@ -511,7 +511,7 @@ __printf(3, 4) int sysfs_emit_at(char *buf, int at, const char *fmt, ...); ssize_t sysfs_bin_attr_simple_read(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count); #else /* CONFIG_SYSFS */ @@ -774,7 +774,7 @@ static inline int sysfs_emit_at(char *buf, int at, const char *fmt, ...) static inline ssize_t sysfs_bin_attr_simple_read(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index ed4cd114180a..7f405672b089 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -237,7 +237,6 @@ struct page_pool { struct { struct hlist_node list; u64 detach_time; - u32 napi_id; u32 id; } user; }; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 4b0677e48190..ed4b83696c77 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1268,9 +1268,19 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir, if (xo) { x = xfrm_input_state(skb); - if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) - return (xo->flags & CRYPTO_DONE) && - (xo->status & CRYPTO_SUCCESS); + if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) { + bool check = (xo->flags & CRYPTO_DONE) && + (xo->status & CRYPTO_SUCCESS); + + /* The packets here are plain ones and secpath was + * needed to indicate that hardware already handled + * them and there is no need to do nothing in addition. + * + * Consume secpath which was set by drivers. + */ + secpath_reset(skb); + return check; + } } return __xfrm_check_nopolicy(net, skb, dir) || diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 5474494a1e99..76de2b662f4f 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -493,8 +493,8 @@ extern void iscsi_destroy_all_flashnode(struct Scsi_Host *shost); extern int iscsi_flashnode_bus_match(struct device *dev, const struct device_driver *drv); extern struct device * -iscsi_find_flashnode_sess(struct Scsi_Host *shost, void *data, - int (*fn)(struct device *dev, void *data)); +iscsi_find_flashnode_sess(struct Scsi_Host *shost, const void *data, + device_match_t fn); extern struct device * iscsi_find_flashnode_conn(struct iscsi_bus_flash_session *fnode_sess); diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index f1e99458e29e..5e0eb41d967e 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -220,6 +220,15 @@ * * 7.41 * - add FUSE_ALLOW_IDMAP + * 7.42 + * - Add FUSE_OVER_IO_URING and all other io-uring related flags and data + * structures: + * - struct fuse_uring_ent_in_out + * - struct fuse_uring_req_header + * - struct fuse_uring_cmd_req + * - FUSE_URING_IN_OUT_HEADER_SZ + * - FUSE_URING_OP_IN_OUT_SZ + * - enum fuse_uring_cmd */ #ifndef _LINUX_FUSE_H @@ -255,7 +264,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 41 +#define FUSE_KERNEL_MINOR_VERSION 42 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -425,6 +434,7 @@ struct fuse_file_lock { * FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit * of the request ID indicates resend requests * FUSE_ALLOW_IDMAP: allow creation of idmapped mounts + * FUSE_OVER_IO_URING: Indicate that client supports io-uring */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -471,6 +481,7 @@ struct fuse_file_lock { /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ #define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP #define FUSE_ALLOW_IDMAP (1ULL << 40) +#define FUSE_OVER_IO_URING (1ULL << 41) /** * CUSE INIT request/reply flags @@ -1206,4 +1217,67 @@ struct fuse_supp_groups { uint32_t groups[]; }; +/** + * Size of the ring buffer header + */ +#define FUSE_URING_IN_OUT_HEADER_SZ 128 +#define FUSE_URING_OP_IN_OUT_SZ 128 + +/* Used as part of the fuse_uring_req_header */ +struct fuse_uring_ent_in_out { + uint64_t flags; + + /* + * commit ID to be used in a reply to a ring request (see also + * struct fuse_uring_cmd_req) + */ + uint64_t commit_id; + + /* size of user payload buffer */ + uint32_t payload_sz; + uint32_t padding; + + uint64_t reserved; +}; + +/** + * Header for all fuse-io-uring requests + */ +struct fuse_uring_req_header { + /* struct fuse_in_header / struct fuse_out_header */ + char in_out[FUSE_URING_IN_OUT_HEADER_SZ]; + + /* per op code header */ + char op_in[FUSE_URING_OP_IN_OUT_SZ]; + + struct fuse_uring_ent_in_out ring_ent_in_out; +}; + +/** + * sqe commands to the kernel + */ +enum fuse_uring_cmd { + FUSE_IO_URING_CMD_INVALID = 0, + + /* register the request buffer and fetch a fuse request */ + FUSE_IO_URING_CMD_REGISTER = 1, + + /* commit fuse request result and fetch next request */ + FUSE_IO_URING_CMD_COMMIT_AND_FETCH = 2, +}; + +/** + * In the 80B command area of the SQE. + */ +struct fuse_uring_cmd_req { + uint64_t flags; + + /* entry identifier for commits */ + uint64_t commit_id; + + /* queue the command is for (queue index) */ + uint16_t qid; + uint8_t padding[6]; +}; + #endif /* _LINUX_FUSE_H */ diff --git a/include/uapi/mtd/ubi-user.h b/include/uapi/mtd/ubi-user.h index e1571603175e..aa872a41ffb9 100644 --- a/include/uapi/mtd/ubi-user.h +++ b/include/uapi/mtd/ubi-user.h @@ -175,6 +175,8 @@ #define UBI_IOCRPEB _IOW(UBI_IOC_MAGIC, 4, __s32) /* Force scrubbing on the specified PEB */ #define UBI_IOCSPEB _IOW(UBI_IOC_MAGIC, 5, __s32) +/* Read detailed device erase counter information */ +#define UBI_IOCECNFO _IOWR(UBI_IOC_MAGIC, 6, struct ubi_ecinfo_req) /* ioctl commands of the UBI control character device */ @@ -413,6 +415,37 @@ struct ubi_rnvol_req { } __packed; /** + * struct ubi_ecinfo_req - a data structure used for requesting and receiving + * erase block counter information from a UBI device. + * + * @start: index of first physical erase block to read (in) + * @length: number of erase counters to read (in) + * @read_length: number of erase counters that was actually read (out) + * @padding: reserved for future, not used, has to be zeroed + * @erase_counters: array of erase counter values (out) + * + * This structure is used to retrieve erase counter information for a specified + * range of PEBs on a UBI device. + * Erase counters are read from @start and attempts to read @length number of + * erase counters. + * The retrieved values are stored in the @erase_counters array. It is the + * responsibility of the caller to allocate enough memory for storing @length + * elements in the @erase_counters array. + * If a block is bad or if the erase counter is unknown the corresponding value + * in the array will be set to -1. + * The @read_length field will indicate the number of erase counters actually + * read. Typically @read_length will be limited due to memory or the number of + * PEBs on the UBI device. + */ +struct ubi_ecinfo_req { + __s32 start; + __s32 length; + __s32 read_length; + __s8 padding[16]; + __s32 erase_counters[]; +} __packed; + +/** * struct ubi_leb_change_req - a data structure used in atomic LEB change * requests. * @lnum: logical eraseblock number to change diff --git a/init/Kconfig b/init/Kconfig index 4dbc059d2de5..d0d021b3fa3b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1969,7 +1969,8 @@ config RUST bool "Rust support" depends on HAVE_RUST depends on RUST_IS_AVAILABLE - depends on !MODVERSIONS + select EXTENDED_MODVERSIONS if MODVERSIONS + depends on !MODVERSIONS || GENDWARFKSYMS depends on !GCC_PLUGIN_RANDSTRUCT depends on !RANDSTRUCT depends on !DEBUG_INFO_BTF || PAHOLE_HAS_LANG_EXCLUDE diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index f86ef92a6c46..f6867bad0d78 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -21,7 +21,7 @@ phys_addr_t phys_initrd_start __initdata; unsigned long phys_initrd_size __initdata; #ifdef CONFIG_SYSCTL -static struct ctl_table kern_do_mounts_initrd_table[] = { +static const struct ctl_table kern_do_mounts_initrd_table[] = { { .procname = "real-root-dev", .data = &real_root_dev, diff --git a/io_uring/Makefile b/io_uring/Makefile index 53167bef37d7..d695b60dba4f 100644 --- a/io_uring/Makefile +++ b/io_uring/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_IO_URING) += io_uring.o opdef.o kbuf.o rsrc.o notif.o \ sync.o msg_ring.o advise.o openclose.o \ epoll.o statx.o timeout.o fdinfo.o \ cancel.o waitid.o register.o \ - truncate.o memmap.o + truncate.o memmap.o alloc_cache.o obj-$(CONFIG_IO_WQ) += io-wq.o obj-$(CONFIG_FUTEX) += futex.o obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o diff --git a/io_uring/alloc_cache.c b/io_uring/alloc_cache.c new file mode 100644 index 000000000000..58423888b736 --- /dev/null +++ b/io_uring/alloc_cache.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "alloc_cache.h" + +void io_alloc_cache_free(struct io_alloc_cache *cache, + void (*free)(const void *)) +{ + void *entry; + + if (!cache->entries) + return; + + while ((entry = io_alloc_cache_get(cache)) != NULL) + free(entry); + + kvfree(cache->entries); + cache->entries = NULL; +} + +/* returns false if the cache was initialized properly */ +bool io_alloc_cache_init(struct io_alloc_cache *cache, + unsigned max_nr, unsigned int size, + unsigned int init_bytes) +{ + cache->entries = kvmalloc_array(max_nr, sizeof(void *), GFP_KERNEL); + if (!cache->entries) + return true; + + cache->nr_cached = 0; + cache->max_cached = max_nr; + cache->elem_size = size; + cache->init_clear = init_bytes; + return false; +} + +void *io_cache_alloc_new(struct io_alloc_cache *cache, gfp_t gfp) +{ + void *obj; + + obj = kmalloc(cache->elem_size, gfp); + if (obj && cache->init_clear) + memset(obj, 0, cache->init_clear); + return obj; +} diff --git a/io_uring/alloc_cache.h b/io_uring/alloc_cache.h index a3a8cfec32ce..0dd17d8ba93a 100644 --- a/io_uring/alloc_cache.h +++ b/io_uring/alloc_cache.h @@ -1,11 +1,30 @@ #ifndef IOU_ALLOC_CACHE_H #define IOU_ALLOC_CACHE_H +#include <linux/io_uring_types.h> + /* * Don't allow the cache to grow beyond this size. */ #define IO_ALLOC_CACHE_MAX 128 +void io_alloc_cache_free(struct io_alloc_cache *cache, + void (*free)(const void *)); +bool io_alloc_cache_init(struct io_alloc_cache *cache, + unsigned max_nr, unsigned int size, + unsigned int init_bytes); + +void *io_cache_alloc_new(struct io_alloc_cache *cache, gfp_t gfp); + +static inline void io_alloc_cache_kasan(struct iovec **iov, int *nr) +{ + if (IS_ENABLED(CONFIG_KASAN)) { + kfree(*iov); + *iov = NULL; + *nr = 0; + } +} + static inline bool io_alloc_cache_put(struct io_alloc_cache *cache, void *entry) { @@ -23,52 +42,30 @@ static inline void *io_alloc_cache_get(struct io_alloc_cache *cache) if (cache->nr_cached) { void *entry = cache->entries[--cache->nr_cached]; + /* + * If KASAN is enabled, always clear the initial bytes that + * must be zeroed post alloc, in case any of them overlap + * with KASAN storage. + */ +#if defined(CONFIG_KASAN) kasan_mempool_unpoison_object(entry, cache->elem_size); + if (cache->init_clear) + memset(entry, 0, cache->init_clear); +#endif return entry; } return NULL; } -static inline void *io_cache_alloc(struct io_alloc_cache *cache, gfp_t gfp, - void (*init_once)(void *obj)) +static inline void *io_cache_alloc(struct io_alloc_cache *cache, gfp_t gfp) { - if (unlikely(!cache->nr_cached)) { - void *obj = kmalloc(cache->elem_size, gfp); + void *obj; - if (obj && init_once) - init_once(obj); + obj = io_alloc_cache_get(cache); + if (obj) return obj; - } - return io_alloc_cache_get(cache); + return io_cache_alloc_new(cache, gfp); } -/* returns false if the cache was initialized properly */ -static inline bool io_alloc_cache_init(struct io_alloc_cache *cache, - unsigned max_nr, size_t size) -{ - cache->entries = kvmalloc_array(max_nr, sizeof(void *), GFP_KERNEL); - if (cache->entries) { - cache->nr_cached = 0; - cache->max_cached = max_nr; - cache->elem_size = size; - return false; - } - return true; -} - -static inline void io_alloc_cache_free(struct io_alloc_cache *cache, - void (*free)(const void *)) -{ - void *entry; - - if (!cache->entries) - return; - - while ((entry = io_alloc_cache_get(cache)) != NULL) - free(entry); - - kvfree(cache->entries); - cache->entries = NULL; -} #endif diff --git a/io_uring/filetable.c b/io_uring/filetable.c index a21660e3145a..dd8eeec97acf 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -68,7 +68,7 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, if (slot_index >= ctx->file_table.data.nr) return -EINVAL; - node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE); + node = io_rsrc_node_alloc(IORING_RSRC_FILE); if (!node) return -ENOMEM; diff --git a/io_uring/futex.c b/io_uring/futex.c index 30139cc150f2..3159a2b7eeca 100644 --- a/io_uring/futex.c +++ b/io_uring/futex.c @@ -36,7 +36,7 @@ struct io_futex_data { bool io_futex_cache_init(struct io_ring_ctx *ctx) { return io_alloc_cache_init(&ctx->futex_cache, IO_FUTEX_ALLOC_CACHE_MAX, - sizeof(struct io_futex_data)); + sizeof(struct io_futex_data), 0); } void io_futex_cache_free(struct io_ring_ctx *ctx) @@ -320,7 +320,7 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags) } io_ring_submit_lock(ctx, issue_flags); - ifd = io_cache_alloc(&ctx->futex_cache, GFP_NOWAIT, NULL); + ifd = io_cache_alloc(&ctx->futex_cache, GFP_NOWAIT); if (!ifd) { ret = -ENOMEM; goto done_unlock; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 7bfbc7c22367..ceacf6230e34 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -157,7 +157,7 @@ static int __read_mostly sysctl_io_uring_disabled; static int __read_mostly sysctl_io_uring_group = -1; #ifdef CONFIG_SYSCTL -static struct ctl_table kernel_io_uring_disabled_table[] = { +static const struct ctl_table kernel_io_uring_disabled_table[] = { { .procname = "io_uring_disabled", .data = &sysctl_io_uring_disabled, @@ -315,16 +315,18 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) INIT_LIST_HEAD(&ctx->cq_overflow_list); INIT_LIST_HEAD(&ctx->io_buffers_cache); ret = io_alloc_cache_init(&ctx->apoll_cache, IO_POLL_ALLOC_CACHE_MAX, - sizeof(struct async_poll)); + sizeof(struct async_poll), 0); ret |= io_alloc_cache_init(&ctx->netmsg_cache, IO_ALLOC_CACHE_MAX, - sizeof(struct io_async_msghdr)); + sizeof(struct io_async_msghdr), + offsetof(struct io_async_msghdr, clear)); ret |= io_alloc_cache_init(&ctx->rw_cache, IO_ALLOC_CACHE_MAX, - sizeof(struct io_async_rw)); + sizeof(struct io_async_rw), + offsetof(struct io_async_rw, clear)); ret |= io_alloc_cache_init(&ctx->uring_cache, IO_ALLOC_CACHE_MAX, - sizeof(struct io_uring_cmd_data)); + sizeof(struct io_uring_cmd_data), 0); spin_lock_init(&ctx->msg_lock); ret |= io_alloc_cache_init(&ctx->msg_cache, IO_ALLOC_CACHE_MAX, - sizeof(struct io_kiocb)); + sizeof(struct io_kiocb), 0); ret |= io_futex_cache_init(ctx); if (ret) goto free_ref; diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index f65e3f3ede51..ab619e63ef39 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -226,21 +226,16 @@ static inline void io_req_set_res(struct io_kiocb *req, s32 res, u32 cflags) } static inline void *io_uring_alloc_async_data(struct io_alloc_cache *cache, - struct io_kiocb *req, - void (*init_once)(void *obj)) + struct io_kiocb *req) { - req->async_data = io_cache_alloc(cache, GFP_KERNEL, init_once); - if (req->async_data) - req->flags |= REQ_F_ASYNC_DATA; - return req->async_data; -} + if (cache) { + req->async_data = io_cache_alloc(cache, GFP_KERNEL); + } else { + const struct io_issue_def *def = &io_issue_defs[req->opcode]; -static inline void *io_uring_alloc_async_data_nocache(struct io_kiocb *req) -{ - const struct io_issue_def *def = &io_issue_defs[req->opcode]; - - WARN_ON_ONCE(!def->async_size); - req->async_data = kmalloc(def->async_size, GFP_KERNEL); + WARN_ON_ONCE(!def->async_size); + req->async_data = kmalloc(def->async_size, GFP_KERNEL); + } if (req->async_data) req->flags |= REQ_F_ASYNC_DATA; return req->async_data; diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index bd3cd78d2dba..7e6f68e911f1 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -89,8 +89,7 @@ static void io_msg_tw_complete(struct io_kiocb *req, struct io_tw_state *ts) static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req, int res, u32 cflags, u64 user_data) { - req->tctx = READ_ONCE(ctx->submitter_task->io_uring); - if (!req->tctx) { + if (!READ_ONCE(ctx->submitter_task)) { kmem_cache_free(req_cachep, req); return -EOWNERDEAD; } @@ -98,6 +97,7 @@ static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req, io_req_set_res(req, res, cflags); percpu_ref_get(&ctx->refs); req->ctx = ctx; + req->tctx = NULL; req->io_task_work.func = io_msg_tw_complete; io_req_task_work_add_remote(req, ctx, IOU_F_TWQ_LAZY_WAKE); return 0; diff --git a/io_uring/net.c b/io_uring/net.c index 85f55fbc25c9..17852a6616ff 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -137,7 +137,6 @@ static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg) static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) { struct io_async_msghdr *hdr = req->async_data; - struct iovec *iov; /* can't recycle, ensure we free the iovec if we have one */ if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { @@ -146,44 +145,30 @@ static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) } /* Let normal cleanup path reap it if we fail adding to the cache */ - iov = hdr->free_iov; + io_alloc_cache_kasan(&hdr->free_iov, &hdr->free_iov_nr); if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) { - if (iov) - kasan_mempool_poison_object(iov); req->async_data = NULL; req->flags &= ~REQ_F_ASYNC_DATA; } } -static void io_msg_async_data_init(void *obj) -{ - struct io_async_msghdr *hdr = (struct io_async_msghdr *)obj; - - hdr->free_iov = NULL; - hdr->free_iov_nr = 0; -} - static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; struct io_async_msghdr *hdr; - hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req, - io_msg_async_data_init); + hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req); if (!hdr) return NULL; /* If the async data was cached, we might have an iov cached inside. */ - if (hdr->free_iov) { - kasan_mempool_unpoison_object(hdr->free_iov, - hdr->free_iov_nr * sizeof(struct iovec)); + if (hdr->free_iov) req->flags |= REQ_F_NEED_CLEANUP; - } return hdr; } /* assign new iovec to kmsg, if we need to */ -static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg, +static void io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg, struct iovec *iov) { if (iov) { @@ -193,7 +178,6 @@ static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg, kfree(kmsg->free_iov); kmsg->free_iov = iov; } - return 0; } static inline void io_mshot_prep_retry(struct io_kiocb *req, @@ -255,7 +239,8 @@ static int io_compat_msg_copy_hdr(struct io_kiocb *req, if (unlikely(ret < 0)) return ret; - return io_net_vec_assign(req, iomsg, iov); + io_net_vec_assign(req, iomsg, iov); + return 0; } #endif @@ -295,11 +280,12 @@ static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, ret = -EINVAL; goto ua_end; } else { + struct iovec __user *uiov = msg->msg_iov; + /* we only need the length for provided buffers */ - if (!access_ok(&msg->msg_iov[0].iov_len, sizeof(__kernel_size_t))) + if (!access_ok(&uiov->iov_len, sizeof(uiov->iov_len))) goto ua_end; - unsafe_get_user(iov->iov_len, &msg->msg_iov[0].iov_len, - ua_end); + unsafe_get_user(iov->iov_len, &uiov->iov_len, ua_end); sr->len = iov->iov_len; } ret = 0; @@ -314,7 +300,8 @@ ua_end: if (unlikely(ret < 0)) return ret; - return io_net_vec_assign(req, iomsg, iov); + io_net_vec_assign(req, iomsg, iov); + return 0; } static int io_sendmsg_copy_hdr(struct io_kiocb *req, @@ -579,6 +566,54 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) return IOU_OK; } +static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags, + struct io_async_msghdr *kmsg) +{ + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + + int ret; + struct buf_sel_arg arg = { + .iovs = &kmsg->fast_iov, + .max_len = min_not_zero(sr->len, INT_MAX), + .nr_iovs = 1, + }; + + if (kmsg->free_iov) { + arg.nr_iovs = kmsg->free_iov_nr; + arg.iovs = kmsg->free_iov; + arg.mode = KBUF_MODE_FREE; + } + + if (!(sr->flags & IORING_RECVSEND_BUNDLE)) + arg.nr_iovs = 1; + else + arg.mode |= KBUF_MODE_EXPAND; + + ret = io_buffers_select(req, &arg, issue_flags); + if (unlikely(ret < 0)) + return ret; + + if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { + kmsg->free_iov_nr = ret; + kmsg->free_iov = arg.iovs; + req->flags |= REQ_F_NEED_CLEANUP; + } + sr->len = arg.out_len; + + if (ret == 1) { + sr->buf = arg.iovs[0].iov_base; + ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, + &kmsg->msg.msg_iter); + if (unlikely(ret)) + return ret; + } else { + iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE, + arg.iovs, ret, arg.out_len); + } + + return 0; +} + int io_send(struct io_kiocb *req, unsigned int issue_flags) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); @@ -602,44 +637,9 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags) retry_bundle: if (io_do_buffer_select(req)) { - struct buf_sel_arg arg = { - .iovs = &kmsg->fast_iov, - .max_len = min_not_zero(sr->len, INT_MAX), - .nr_iovs = 1, - }; - - if (kmsg->free_iov) { - arg.nr_iovs = kmsg->free_iov_nr; - arg.iovs = kmsg->free_iov; - arg.mode = KBUF_MODE_FREE; - } - - if (!(sr->flags & IORING_RECVSEND_BUNDLE)) - arg.nr_iovs = 1; - else - arg.mode |= KBUF_MODE_EXPAND; - - ret = io_buffers_select(req, &arg, issue_flags); - if (unlikely(ret < 0)) + ret = io_send_select_buffer(req, issue_flags, kmsg); + if (ret) return ret; - - if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { - kmsg->free_iov_nr = ret; - kmsg->free_iov = arg.iovs; - req->flags |= REQ_F_NEED_CLEANUP; - } - sr->len = arg.out_len; - - if (ret == 1) { - sr->buf = arg.iovs[0].iov_base; - ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, - &kmsg->msg.msg_iter); - if (unlikely(ret)) - return ret; - } else { - iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE, - arg.iovs, ret, arg.out_len); - } } /* @@ -1710,6 +1710,11 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags) int ret; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + if (unlikely(req->flags & REQ_F_FAIL)) { + ret = -ECONNRESET; + goto out; + } + file_flags = force_nonblock ? O_NONBLOCK : 0; ret = __sys_connect_file(req->file, &io->addr, connect->addr_len, @@ -1813,11 +1818,8 @@ void io_netmsg_cache_free(const void *entry) { struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry; - if (kmsg->free_iov) { - kasan_mempool_unpoison_object(kmsg->free_iov, - kmsg->free_iov_nr * sizeof(struct iovec)); + if (kmsg->free_iov) io_netmsg_iovec_free(kmsg); - } kfree(kmsg); } #endif diff --git a/io_uring/net.h b/io_uring/net.h index 52bfee05f06a..b804c2b36e60 100644 --- a/io_uring/net.h +++ b/io_uring/net.h @@ -5,16 +5,20 @@ struct io_async_msghdr { #if defined(CONFIG_NET) - struct iovec fast_iov; - /* points to an allocated iov, if NULL we use fast_iov instead */ struct iovec *free_iov; + /* points to an allocated iov, if NULL we use fast_iov instead */ int free_iov_nr; - int namelen; - __kernel_size_t controllen; - __kernel_size_t payloadlen; - struct sockaddr __user *uaddr; - struct msghdr msg; - struct sockaddr_storage addr; + struct_group(clear, + int namelen; + struct iovec fast_iov; + __kernel_size_t controllen; + __kernel_size_t payloadlen; + struct sockaddr __user *uaddr; + struct msghdr msg; + struct sockaddr_storage addr; + ); +#else + struct_group(clear); #endif }; diff --git a/io_uring/poll.c b/io_uring/poll.c index cc01c40b43d3..bb1c0cd4f809 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -273,6 +273,8 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts) return IOU_POLL_REISSUE; } } + if (unlikely(req->cqe.res & EPOLLERR)) + req_set_fail(req); if (req->apoll_events & EPOLLONESHOT) return IOU_POLL_DONE; @@ -315,8 +317,10 @@ void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts) ret = io_poll_check_events(req, ts); if (ret == IOU_POLL_NO_ACTION) { + io_kbuf_recycle(req, 0); return; } else if (ret == IOU_POLL_REQUEUE) { + io_kbuf_recycle(req, 0); __io_poll_execute(req, 0); return; } @@ -650,7 +654,7 @@ static struct async_poll *io_req_alloc_apoll(struct io_kiocb *req, kfree(apoll->double_poll); } else { if (!(issue_flags & IO_URING_F_UNLOCKED)) - apoll = io_cache_alloc(&ctx->apoll_cache, GFP_ATOMIC, NULL); + apoll = io_cache_alloc(&ctx->apoll_cache, GFP_ATOMIC); else apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); if (!apoll) diff --git a/io_uring/register.c b/io_uring/register.c index 05025047d1da..9a4d2fbce4ae 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -552,7 +552,7 @@ overflow: ctx->cqe_cached = ctx->cqe_sentinel = NULL; WRITE_ONCE(n.rings->sq_dropped, READ_ONCE(o.rings->sq_dropped)); - WRITE_ONCE(n.rings->sq_flags, READ_ONCE(o.rings->sq_flags)); + atomic_set(&n.rings->sq_flags, atomic_read(&o.rings->sq_flags)); WRITE_ONCE(n.rings->cq_flags, READ_ONCE(o.rings->cq_flags)); WRITE_ONCE(n.rings->cq_overflow, READ_ONCE(o.rings->cq_overflow)); @@ -853,6 +853,8 @@ struct file *io_uring_register_get_file(unsigned int fd, bool registered) return ERR_PTR(-EINVAL); fd = array_index_nospec(fd, IO_RINGFD_REG_MAX); file = tctx->registered_rings[fd]; + if (file) + get_file(file); } else { file = fget(fd); } @@ -919,7 +921,7 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, trace_io_uring_register(ctx, opcode, ctx->file_table.data.nr, ctx->buf_table.nr, ret); mutex_unlock(&ctx->uring_lock); - if (!use_registered_ring) - fput(file); + + fput(file); return ret; } diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index e32ac5853391..af39b69eb4fd 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -118,7 +118,7 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_rsrc_node *node) } } -struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type) +struct io_rsrc_node *io_rsrc_node_alloc(int type) { struct io_rsrc_node *node; @@ -203,7 +203,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, err = -EBADF; break; } - node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE); + node = io_rsrc_node_alloc(IORING_RSRC_FILE); if (!node) { err = -ENOMEM; fput(file); @@ -444,8 +444,6 @@ int io_files_update(struct io_kiocb *req, unsigned int issue_flags) void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node) { - lockdep_assert_held(&ctx->uring_lock); - if (node->tag) io_post_aux_cqe(ctx, node->tag, 0, 0); @@ -525,7 +523,7 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, goto fail; } ret = -ENOMEM; - node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE); + node = io_rsrc_node_alloc(IORING_RSRC_FILE); if (!node) { fput(file); goto fail; @@ -730,7 +728,7 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx, if (!iov->iov_base) return NULL; - node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER); + node = io_rsrc_node_alloc(IORING_RSRC_BUFFER); if (!node) return ERR_PTR(-ENOMEM); node->buf = NULL; @@ -921,6 +919,16 @@ int io_import_fixed(int ddir, struct iov_iter *iter, return 0; } +/* Lock two rings at once. The rings must be different! */ +static void lock_two_rings(struct io_ring_ctx *ctx1, struct io_ring_ctx *ctx2) +{ + if (ctx1 > ctx2) + swap(ctx1, ctx2); + mutex_lock(&ctx1->uring_lock); + mutex_lock_nested(&ctx2->uring_lock, SINGLE_DEPTH_NESTING); +} + +/* Both rings are locked by the caller. */ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx, struct io_uring_clone_buffers *arg) { @@ -928,6 +936,9 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx int i, ret, off, nr; unsigned int nbufs; + lockdep_assert_held(&ctx->uring_lock); + lockdep_assert_held(&src_ctx->uring_lock); + /* * Accounting state is shared between the two rings; that only works if * both rings are accounted towards the same counters. @@ -942,7 +953,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx if (ctx->buf_table.nr && !(arg->flags & IORING_REGISTER_DST_REPLACE)) return -EBUSY; - nbufs = READ_ONCE(src_ctx->buf_table.nr); + nbufs = src_ctx->buf_table.nr; if (!arg->nr) arg->nr = nbufs; else if (arg->nr > nbufs) @@ -966,27 +977,20 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx } } - /* - * Drop our own lock here. We'll setup the data we need and reference - * the source buffers, then re-grab, check, and assign at the end. - */ - mutex_unlock(&ctx->uring_lock); - - mutex_lock(&src_ctx->uring_lock); ret = -ENXIO; nbufs = src_ctx->buf_table.nr; if (!nbufs) - goto out_unlock; + goto out_free; ret = -EINVAL; if (!arg->nr) arg->nr = nbufs; else if (arg->nr > nbufs) - goto out_unlock; + goto out_free; ret = -EOVERFLOW; if (check_add_overflow(arg->nr, arg->src_off, &off)) - goto out_unlock; + goto out_free; if (off > nbufs) - goto out_unlock; + goto out_free; off = arg->dst_off; i = arg->src_off; @@ -998,10 +1002,10 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx if (!src_node) { dst_node = NULL; } else { - dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER); + dst_node = io_rsrc_node_alloc(IORING_RSRC_BUFFER); if (!dst_node) { ret = -ENOMEM; - goto out_unlock; + goto out_free; } refcount_inc(&src_node->buf->refs); @@ -1011,10 +1015,6 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx i++; } - /* Have a ref on the bufs now, drop src lock and re-grab our own lock */ - mutex_unlock(&src_ctx->uring_lock); - mutex_lock(&ctx->uring_lock); - /* * If asked for replace, put the old table. data->nodes[] holds both * old and new nodes at this point. @@ -1023,24 +1023,17 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx io_rsrc_data_free(ctx, &ctx->buf_table); /* - * ctx->buf_table should be empty now - either the contents are being - * replaced and we just freed the table, or someone raced setting up - * a buffer table while the clone was happening. If not empty, fall - * through to failure handling. + * ctx->buf_table must be empty now - either the contents are being + * replaced and we just freed the table, or the contents are being + * copied to a ring that does not have buffers yet (checked at function + * entry). */ - if (!ctx->buf_table.nr) { - ctx->buf_table = data; - return 0; - } + WARN_ON_ONCE(ctx->buf_table.nr); + ctx->buf_table = data; + return 0; - mutex_unlock(&ctx->uring_lock); - mutex_lock(&src_ctx->uring_lock); - /* someone raced setting up buffers, dump ours */ - ret = -EBUSY; -out_unlock: +out_free: io_rsrc_data_free(ctx, &data); - mutex_unlock(&src_ctx->uring_lock); - mutex_lock(&ctx->uring_lock); return ret; } @@ -1054,6 +1047,7 @@ out_unlock: int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg) { struct io_uring_clone_buffers buf; + struct io_ring_ctx *src_ctx; bool registered_src; struct file *file; int ret; @@ -1071,8 +1065,18 @@ int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg) file = io_uring_register_get_file(buf.src_fd, registered_src); if (IS_ERR(file)) return PTR_ERR(file); - ret = io_clone_buffers(ctx, file->private_data, &buf); - if (!registered_src) - fput(file); + + src_ctx = file->private_data; + if (src_ctx != ctx) { + mutex_unlock(&ctx->uring_lock); + lock_two_rings(ctx, src_ctx); + } + + ret = io_clone_buffers(ctx, src_ctx, &buf); + + if (src_ctx != ctx) + mutex_unlock(&src_ctx->uring_lock); + + fput(file); return ret; } diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index c8b093584461..190f7ee45de9 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -2,6 +2,8 @@ #ifndef IOU_RSRC_H #define IOU_RSRC_H +#include <linux/lockdep.h> + #define IO_NODE_ALLOC_CACHE_MAX 32 #define IO_RSRC_TAG_TABLE_SHIFT (PAGE_SHIFT - 3) @@ -43,7 +45,7 @@ struct io_imu_folio_data { unsigned int nr_folios; }; -struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type); +struct io_rsrc_node *io_rsrc_node_alloc(int type); void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node); void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data); int io_rsrc_data_alloc(struct io_rsrc_data *data, unsigned nr); @@ -80,6 +82,7 @@ static inline struct io_rsrc_node *io_rsrc_node_lookup(struct io_rsrc_data *data static inline void io_put_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node) { + lockdep_assert_held(&ctx->uring_lock); if (node && !--node->refs) io_free_rsrc_node(ctx, node); } diff --git a/io_uring/rw.c b/io_uring/rw.c index a9a2733be842..7aa1e4c9f64a 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -146,28 +146,15 @@ static inline int io_import_iovec(int rw, struct io_kiocb *req, return 0; } -static void io_rw_iovec_free(struct io_async_rw *rw) -{ - if (rw->free_iovec) { - kfree(rw->free_iovec); - rw->free_iov_nr = 0; - rw->free_iovec = NULL; - } -} - static void io_rw_recycle(struct io_kiocb *req, unsigned int issue_flags) { struct io_async_rw *rw = req->async_data; - struct iovec *iov; - if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { - io_rw_iovec_free(rw); + if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) return; - } - iov = rw->free_iovec; + + io_alloc_cache_kasan(&rw->free_iovec, &rw->free_iov_nr); if (io_alloc_cache_put(&req->ctx->rw_cache, rw)) { - if (iov) - kasan_mempool_poison_object(iov); req->async_data = NULL; req->flags &= ~REQ_F_ASYNC_DATA; } @@ -208,27 +195,16 @@ static void io_req_rw_cleanup(struct io_kiocb *req, unsigned int issue_flags) } } -static void io_rw_async_data_init(void *obj) -{ - struct io_async_rw *rw = (struct io_async_rw *)obj; - - rw->free_iovec = NULL; - rw->bytes_done = 0; -} - static int io_rw_alloc_async(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; struct io_async_rw *rw; - rw = io_uring_alloc_async_data(&ctx->rw_cache, req, io_rw_async_data_init); + rw = io_uring_alloc_async_data(&ctx->rw_cache, req); if (!rw) return -ENOMEM; - if (rw->free_iovec) { - kasan_mempool_unpoison_object(rw->free_iovec, - rw->free_iov_nr * sizeof(struct iovec)); + if (rw->free_iovec) req->flags |= REQ_F_NEED_CLEANUP; - } rw->bytes_done = 0; return 0; } @@ -1323,10 +1299,7 @@ void io_rw_cache_free(const void *entry) { struct io_async_rw *rw = (struct io_async_rw *) entry; - if (rw->free_iovec) { - kasan_mempool_unpoison_object(rw->free_iovec, - rw->free_iov_nr * sizeof(struct iovec)); - io_rw_iovec_free(rw); - } + if (rw->free_iovec) + kfree(rw->free_iovec); kfree(rw); } diff --git a/io_uring/rw.h b/io_uring/rw.h index 2d7656bd268d..eaa59bd64870 100644 --- a/io_uring/rw.h +++ b/io_uring/rw.h @@ -9,19 +9,24 @@ struct io_meta_state { struct io_async_rw { size_t bytes_done; - struct iov_iter iter; - struct iov_iter_state iter_state; - struct iovec fast_iov; struct iovec *free_iovec; - int free_iov_nr; - /* wpq is for buffered io, while meta fields are used with direct io */ - union { - struct wait_page_queue wpq; - struct { - struct uio_meta meta; - struct io_meta_state meta_state; + struct_group(clear, + struct iov_iter iter; + struct iov_iter_state iter_state; + struct iovec fast_iov; + int free_iov_nr; + /* + * wpq is for buffered io, while meta fields are used with + * direct io + */ + union { + struct wait_page_queue wpq; + struct { + struct uio_meta meta; + struct io_meta_state meta_state; + }; }; - }; + ); }; int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe); diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 2bd7e0a317bb..48fc8cf70784 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -544,7 +544,7 @@ static int __io_timeout_prep(struct io_kiocb *req, if (WARN_ON_ONCE(req_has_async_data(req))) return -EFAULT; - data = io_uring_alloc_async_data_nocache(req); + data = io_uring_alloc_async_data(NULL, req); if (!data) return -ENOMEM; data->req = req; diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index fc94c465a985..1f6a82128b47 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -168,23 +168,16 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, u64 res2, } EXPORT_SYMBOL_GPL(io_uring_cmd_done); -static void io_uring_cmd_init_once(void *obj) -{ - struct io_uring_cmd_data *data = obj; - - data->op_data = NULL; -} - static int io_uring_cmd_prep_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); struct io_uring_cmd_data *cache; - cache = io_uring_alloc_async_data(&req->ctx->uring_cache, req, - io_uring_cmd_init_once); + cache = io_uring_alloc_async_data(&req->ctx->uring_cache, req); if (!cache) return -ENOMEM; + cache->op_data = NULL; if (!(req->flags & REQ_F_FORCE_ASYNC)) { /* defer memcpy until we need it */ @@ -192,8 +185,8 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req, return 0; } - memcpy(req->async_data, sqe, uring_sqe_size(req->ctx)); - ioucmd->sqe = req->async_data; + memcpy(cache->sqes, sqe, uring_sqe_size(req->ctx)); + ioucmd->sqe = cache->sqes; return 0; } @@ -260,7 +253,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) struct io_uring_cmd_data *cache = req->async_data; if (ioucmd->sqe != (void *) cache) - memcpy(cache, ioucmd->sqe, uring_sqe_size(req->ctx)); + memcpy(cache->sqes, ioucmd->sqe, uring_sqe_size(req->ctx)); return -EAGAIN; } else if (ret == -EIOCBQUEUED) { return -EIOCBQUEUED; @@ -350,7 +343,7 @@ int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags) if (!prot || !prot->ioctl) return -EOPNOTSUPP; - switch (cmd->sqe->cmd_op) { + switch (cmd->cmd_op) { case SOCKET_URING_OP_SIOCINQ: ret = prot->ioctl(sk, SIOCINQ, &arg); if (ret) diff --git a/io_uring/waitid.c b/io_uring/waitid.c index 6778c0ee76c4..853e97a7b0ec 100644 --- a/io_uring/waitid.c +++ b/io_uring/waitid.c @@ -303,7 +303,7 @@ int io_waitid(struct io_kiocb *req, unsigned int issue_flags) struct io_waitid_async *iwa; int ret; - iwa = io_uring_alloc_async_data_nocache(req); + iwa = io_uring_alloc_async_data(NULL, req); if (!iwa) return -ENOMEM; diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 54318e0b4557..15b17e86e198 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -73,7 +73,7 @@ int ipc_mni = IPCMNI; int ipc_mni_shift = IPCMNI_SHIFT; int ipc_min_cycle = RADIX_TREE_MAP_SIZE; -static struct ctl_table ipc_sysctls[] = { +static const struct ctl_table ipc_sysctls[] = { { .procname = "shmmax", .data = &init_ipc_ns.shm_ctlmax, diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c index b70dc2ff22d8..0dd12e1c9f53 100644 --- a/ipc/mq_sysctl.c +++ b/ipc/mq_sysctl.c @@ -20,7 +20,7 @@ static int msg_max_limit_max = HARD_MSGMAX; static int msg_maxsize_limit_min = MIN_MSGSIZEMAX; static int msg_maxsize_limit_max = HARD_MSGSIZEMAX; -static struct ctl_table mq_sysctls[] = { +static const struct ctl_table mq_sysctls[] = { { .procname = "queues_max", .data = &init_ipc_ns.mq_queues_max, diff --git a/kernel/acct.c b/kernel/acct.c index 179848ad33e9..31222e8cd534 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -76,7 +76,7 @@ static int acct_parm[3] = {4, 2, 30}; #define ACCT_TIMEOUT (acct_parm[2]) /* foo second timeout between checks */ #ifdef CONFIG_SYSCTL -static struct ctl_table kern_acct_table[] = { +static const struct ctl_table kern_acct_table[] = { { .procname = "acct", .data = &acct_parm, diff --git a/kernel/audit.c b/kernel/audit.c index 13d0144efaa3..5f5bf85bcc90 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1221,7 +1221,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh, struct audit_buffer *ab; u16 msg_type = nlh->nlmsg_type; struct audit_sig_info *sig_data; - struct lsm_context lsmctx; + struct lsm_context lsmctx = { NULL, 0, 0 }; err = audit_netlink_ok(skb, msg_type); if (err) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 8396ce1d0fba..9de6acddd479 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -8001,17 +8001,6 @@ struct btf_module { static LIST_HEAD(btf_modules); static DEFINE_MUTEX(btf_module_mutex); -static ssize_t -btf_module_read(struct file *file, struct kobject *kobj, - struct bin_attribute *bin_attr, - char *buf, loff_t off, size_t len) -{ - const struct btf *btf = bin_attr->private; - - memcpy(buf, btf->data + off, len); - return len; -} - static void purge_cand_cache(struct btf *btf); static int btf_module_notify(struct notifier_block *nb, unsigned long op, @@ -8072,8 +8061,8 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op, attr->attr.name = btf->name; attr->attr.mode = 0444; attr->size = btf->data_size; - attr->private = btf; - attr->read = btf_module_read; + attr->private = btf->data; + attr->read_new = sysfs_bin_attr_simple_read; err = sysfs_create_bin_file(btf_kobj, attr); if (err) { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0daf098e3207..c420edbfb7c8 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -6128,7 +6128,7 @@ static int bpf_unpriv_handler(const struct ctl_table *table, int write, return ret; } -static struct ctl_table bpf_syscall_table[] = { +static const struct ctl_table bpf_syscall_table[] = { { .procname = "unprivileged_bpf_disabled", .data = &sysctl_unprivileged_bpf_disabled, diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c index fedb54c94cdb..81d6cf90584a 100644 --- a/kernel/bpf/sysfs_btf.c +++ b/kernel/bpf/sysfs_btf.c @@ -12,24 +12,16 @@ extern char __start_BTF[]; extern char __stop_BTF[]; -static ssize_t -btf_vmlinux_read(struct file *file, struct kobject *kobj, - struct bin_attribute *bin_attr, - char *buf, loff_t off, size_t len) -{ - memcpy(buf, __start_BTF + off, len); - return len; -} - static struct bin_attribute bin_attr_btf_vmlinux __ro_after_init = { .attr = { .name = "vmlinux", .mode = 0444, }, - .read = btf_vmlinux_read, + .read_new = sysfs_bin_attr_simple_read, }; struct kobject *btf_kobj; static int __init btf_vmlinux_init(void) { + bin_attr_btf_vmlinux.private = __start_BTF; bin_attr_btf_vmlinux.size = __stop_BTF - __start_BTF; if (bin_attr_btf_vmlinux.size == 0) diff --git a/kernel/delayacct.c b/kernel/delayacct.c index b238eb8c6573..eb63a021ac04 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -64,7 +64,7 @@ static int sysctl_delayacct(const struct ctl_table *table, int write, void *buff return err; } -static struct ctl_table kern_delayacct_table[] = { +static const struct ctl_table kern_delayacct_table[] = { { .procname = "task_delayacct", .data = NULL, diff --git a/kernel/exit.c b/kernel/exit.c index 1dcddfe537ee..3485e5fc499e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -85,7 +85,7 @@ static unsigned int oops_limit = 10000; #ifdef CONFIG_SYSCTL -static struct ctl_table kern_exit_table[] = { +static const struct ctl_table kern_exit_table[] = { { .procname = "oops_limit", .data = &oops_limit, diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index 7e1340da5aca..00529c81cc40 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -7,20 +7,13 @@ set -e sfile="$(readlink -f "$0")" outdir="$(pwd)" tarfile=$1 -cpio_dir=$outdir/${tarfile%/*}/.tmp_cpio_dir +tmpdir=$outdir/${tarfile%/*}/.tmp_dir dir_list=" include/ arch/$SRCARCH/include/ " -if ! command -v cpio >/dev/null; then - echo >&2 "***" - echo >&2 "*** 'cpio' could not be found." - echo >&2 "***" - exit 1 -fi - # Support incremental builds by skipping archive generation # if timestamps of files being archived are not changed. @@ -48,9 +41,9 @@ all_dirs="$all_dirs $dir_list" # check include/generated/autoconf.h explicitly. # # Ignore them for md5 calculation to avoid pointless regeneration. -headers_md5="$(find $all_dirs -name "*.h" | - grep -v "include/generated/utsversion.h" | - grep -v "include/generated/autoconf.h" | +headers_md5="$(find $all_dirs -name "*.h" -a \ + ! -path include/generated/utsversion.h -a \ + ! -path include/generated/autoconf.h | xargs ls -l | md5sum | cut -d ' ' -f1)" # Any changes to this script will also cause a rebuild of the archive. @@ -65,36 +58,43 @@ fi echo " GEN $tarfile" -rm -rf $cpio_dir -mkdir $cpio_dir +rm -rf "${tmpdir}" +mkdir "${tmpdir}" if [ "$building_out_of_srctree" ]; then ( cd $srctree for f in $dir_list do find "$f" -name "*.h"; - done | cpio --quiet -pd $cpio_dir + done | tar -c -f - -T - | tar -xf - -C "${tmpdir}" ) fi -# The second CPIO can complain if files already exist which can happen with out -# of tree builds having stale headers in srctree. Just silence CPIO for now. for f in $dir_list; do find "$f" -name "*.h"; -done | cpio --quiet -pdu $cpio_dir >/dev/null 2>&1 +done | tar -c -f - -T - | tar -xf - -C "${tmpdir}" + +# Always exclude include/generated/utsversion.h +# Otherwise, the contents of the tarball may vary depending on the build steps. +rm -f "${tmpdir}/include/generated/utsversion.h" # Remove comments except SDPX lines -find $cpio_dir -type f -print0 | - xargs -0 -P8 -n1 perl -pi -e 'BEGIN {undef $/;}; s/\/\*((?!SPDX).)*?\*\///smg;' +# Use a temporary file to store directory contents to prevent find/xargs from +# seeing temporary files created by perl. +find "${tmpdir}" -type f -print0 > "${tmpdir}.contents.txt" +xargs -0 -P8 -n1 \ + perl -pi -e 'BEGIN {undef $/;}; s/\/\*((?!SPDX).)*?\*\///smg;' \ + < "${tmpdir}.contents.txt" +rm -f "${tmpdir}.contents.txt" # Create archive and try to normalize metadata for reproducibility. tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \ --exclude=".__afs*" --exclude=".nfs*" \ --owner=0 --group=0 --sort=name --numeric-owner --mode=u=rw,go=r,a+X \ - -I $XZ -cf $tarfile -C $cpio_dir/ . > /dev/null + -I $XZ -cf $tarfile -C "${tmpdir}/" . > /dev/null echo $headers_md5 > kernel/kheaders.md5 echo "$this_file_md5" >> kernel/kheaders.md5 echo "$(md5sum $tarfile | cut -d ' ' -f1)" >> kernel/kheaders.md5 -rm -rf $cpio_dir +rm -rf "${tmpdir}" diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 953169893a95..04efa7a6e69b 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -274,7 +274,7 @@ static int proc_dohung_task_timeout_secs(const struct ctl_table *table, int writ * and hung_task_check_interval_secs */ static const unsigned long hung_task_timeout_max = (LONG_MAX / HZ); -static struct ctl_table hung_task_sysctls[] = { +static const struct ctl_table hung_task_sysctls[] = { #ifdef CONFIG_SMP { .procname = "hung_task_all_cpu_backtrace", diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index b424a5c6ae87..c0bdc1686154 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -925,7 +925,7 @@ static int kexec_limit_handler(const struct ctl_table *table, int write, return proc_dointvec(&tmp, write, buffer, lenp, ppos); } -static struct ctl_table kexec_core_sysctls[] = { +static const struct ctl_table kexec_core_sysctls[] = { { .procname = "kexec_load_disabled", .data = &kexec_load_disabled, diff --git a/kernel/kheaders.c b/kernel/kheaders.c index 42163c9e94e5..378088b07f46 100644 --- a/kernel/kheaders.c +++ b/kernel/kheaders.c @@ -29,25 +29,12 @@ asm ( extern char kernel_headers_data[]; extern char kernel_headers_data_end[]; -static ssize_t -ikheaders_read(struct file *file, struct kobject *kobj, - struct bin_attribute *bin_attr, - char *buf, loff_t off, size_t len) -{ - memcpy(buf, &kernel_headers_data[off], len); - return len; -} - -static struct bin_attribute kheaders_attr __ro_after_init = { - .attr = { - .name = "kheaders.tar.xz", - .mode = 0444, - }, - .read = &ikheaders_read, -}; +static struct bin_attribute kheaders_attr __ro_after_init = + __BIN_ATTR_SIMPLE_RO(kheaders.tar.xz, 0444); static int __init ikheaders_init(void) { + kheaders_attr.private = kernel_headers_data; kheaders_attr.size = (kernel_headers_data_end - kernel_headers_data); return sysfs_create_bin_file(kernel_kobj, &kheaders_attr); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 030569210670..88aeac84e4c0 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -946,7 +946,7 @@ static int proc_kprobes_optimization_handler(const struct ctl_table *table, return ret; } -static struct ctl_table kprobe_sysctls[] = { +static const struct ctl_table kprobe_sysctls[] = { { .procname = "kprobes-optimization", .data = &sysctl_kprobes_optimization, diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 1bab21b4718f..eefb67d9883c 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -239,21 +239,7 @@ extern const void __start_notes; extern const void __stop_notes; #define notes_size (&__stop_notes - &__start_notes) -static ssize_t notes_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, - char *buf, loff_t off, size_t count) -{ - memcpy(buf, &__start_notes + off, count); - return count; -} - -static struct bin_attribute notes_attr __ro_after_init = { - .attr = { - .name = "notes", - .mode = S_IRUGO, - }, - .read = ¬es_read, -}; +static __ro_after_init BIN_ATTR_SIMPLE_RO(notes); struct kobject *kernel_kobj; EXPORT_SYMBOL_GPL(kernel_kobj); @@ -307,8 +293,9 @@ static int __init ksysfs_init(void) goto kset_exit; if (notes_size > 0) { - notes_attr.size = notes_size; - error = sysfs_create_bin_file(kernel_kobj, ¬es_attr); + bin_attr_notes.private = (void *)&__start_notes; + bin_attr_notes.size = notes_size; + error = sysfs_create_bin_file(kernel_kobj, &bin_attr_notes); if (error) goto group_exit; } diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 77ee3ea8a573..d4281d1e13a6 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c @@ -77,7 +77,7 @@ static int sysctl_latencytop(const struct ctl_table *table, int write, void *buf return err; } -static struct ctl_table latencytop_sysctl[] = { +static const struct ctl_table latencytop_sysctl[] = { { .procname = "latencytop", .data = &latencytop_enabled, diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 29acd238dad7..4470680f0226 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -79,7 +79,7 @@ module_param(lock_stat, int, 0644); #endif #ifdef CONFIG_SYSCTL -static struct ctl_table kern_lockdep_table[] = { +static const struct ctl_table kern_lockdep_table[] = { #ifdef CONFIG_PROVE_LOCKING { .procname = "prove_locking", diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig index 74fe976e3b01..d7762ef5949a 100644 --- a/kernel/module/Kconfig +++ b/kernel/module/Kconfig @@ -169,6 +169,36 @@ config MODVERSIONS make them incompatible with the kernel you are running. If unsure, say N. +choice + prompt "Module versioning implementation" + depends on MODVERSIONS + help + Select the tool used to calculate symbol versions for modules. + + If unsure, select GENKSYMS. + +config GENKSYMS + bool "genksyms (from source code)" + help + Calculate symbol versions from pre-processed source code using + genksyms. + + If unsure, say Y. + +config GENDWARFKSYMS + bool "gendwarfksyms (from debugging information)" + depends on DEBUG_INFO + # Requires full debugging information, split DWARF not supported. + depends on !DEBUG_INFO_REDUCED && !DEBUG_INFO_SPLIT + # Requires ELF object files. + depends on !LTO + help + Calculate symbol versions from DWARF debugging information using + gendwarfksyms. Requires DEBUG_INFO to be enabled. + + If unsure, say N. +endchoice + config ASM_MODVERSIONS bool default HAVE_ASM_MODVERSIONS && MODVERSIONS @@ -177,6 +207,31 @@ config ASM_MODVERSIONS assembly. This can be enabled only when the target architecture supports it. +config EXTENDED_MODVERSIONS + bool "Extended Module Versioning Support" + depends on MODVERSIONS + help + This enables extended MODVERSIONs support, allowing long symbol + names to be versioned. + + The most likely reason you would enable this is to enable Rust + support. If unsure, say N. + +config BASIC_MODVERSIONS + bool "Basic Module Versioning Support" + depends on MODVERSIONS + default y + help + This enables basic MODVERSIONS support, allowing older tools or + kernels to potentially load modules. + + Disabling this may cause older `modprobe` or `kmod` to be unable + to read MODVERSIONS information from built modules. With this + disabled, older kernels may treat this module as unversioned. + + This is enabled by default when MODVERSIONS are enabled. + If unsure, say Y. + config MODULE_SRCVERSION_ALL bool "Source checksum for all modules" help diff --git a/kernel/module/internal.h b/kernel/module/internal.h index b35c0ec54a89..d09b46ef032f 100644 --- a/kernel/module/internal.h +++ b/kernel/module/internal.h @@ -55,8 +55,8 @@ extern const struct kernel_symbol __start___ksymtab[]; extern const struct kernel_symbol __stop___ksymtab[]; extern const struct kernel_symbol __start___ksymtab_gpl[]; extern const struct kernel_symbol __stop___ksymtab_gpl[]; -extern const s32 __start___kcrctab[]; -extern const s32 __start___kcrctab_gpl[]; +extern const u32 __start___kcrctab[]; +extern const u32 __start___kcrctab_gpl[]; struct load_info { const char *name; @@ -86,6 +86,8 @@ struct load_info { unsigned int vers; unsigned int info; unsigned int pcpu; + unsigned int vers_ext_crc; + unsigned int vers_ext_name; } index; }; @@ -102,7 +104,7 @@ struct find_symbol_arg { /* Output */ struct module *owner; - const s32 *crc; + const u32 *crc; const struct kernel_symbol *sym; enum mod_license license; }; @@ -385,16 +387,25 @@ static inline void init_param_lock(struct module *mod) { } #ifdef CONFIG_MODVERSIONS int check_version(const struct load_info *info, - const char *symname, struct module *mod, const s32 *crc); + const char *symname, struct module *mod, const u32 *crc); void module_layout(struct module *mod, struct modversion_info *ver, struct kernel_param *kp, struct kernel_symbol *ks, struct tracepoint * const *tp); int check_modstruct_version(const struct load_info *info, struct module *mod); int same_magic(const char *amagic, const char *bmagic, bool has_crcs); +struct modversion_info_ext { + size_t remaining; + const u32 *crc; + const char *name; +}; +void modversion_ext_start(const struct load_info *info, struct modversion_info_ext *ver); +void modversion_ext_advance(struct modversion_info_ext *ver); +#define for_each_modversion_info_ext(ver, info) \ + for (modversion_ext_start(info, &ver); ver.remaining > 0; modversion_ext_advance(&ver)) #else /* !CONFIG_MODVERSIONS */ static inline int check_version(const struct load_info *info, const char *symname, struct module *mod, - const s32 *crc) + const u32 *crc) { return 1; } diff --git a/kernel/module/main.c b/kernel/module/main.c index 8808b6906d5a..1fb9ad289a6f 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -86,7 +86,7 @@ struct mod_tree_root mod_tree __cacheline_aligned = { struct symsearch { const struct kernel_symbol *start, *stop; - const s32 *crcs; + const u32 *crcs; enum mod_license license; }; @@ -2074,6 +2074,82 @@ static int elf_validity_cache_index_str(struct load_info *info) } /** + * elf_validity_cache_index_versions() - Validate and cache version indices + * @info: Load info to cache version indices in. + * Must have &load_info->sechdrs and &load_info->secstrings populated. + * @flags: Load flags, relevant to suppress version loading, see + * uapi/linux/module.h + * + * If we're ignoring modversions based on @flags, zero all version indices + * and return validity. Othewrise check: + * + * * If "__version_ext_crcs" is present, "__version_ext_names" is present + * * There is a name present for every crc + * + * Then populate: + * + * * &load_info->index.vers + * * &load_info->index.vers_ext_crc + * * &load_info->index.vers_ext_names + * + * if present. + * + * Return: %0 if valid, %-ENOEXEC on failure. + */ +static int elf_validity_cache_index_versions(struct load_info *info, int flags) +{ + unsigned int vers_ext_crc; + unsigned int vers_ext_name; + size_t crc_count; + size_t remaining_len; + size_t name_size; + char *name; + + /* If modversions were suppressed, pretend we didn't find any */ + if (flags & MODULE_INIT_IGNORE_MODVERSIONS) { + info->index.vers = 0; + info->index.vers_ext_crc = 0; + info->index.vers_ext_name = 0; + return 0; + } + + vers_ext_crc = find_sec(info, "__version_ext_crcs"); + vers_ext_name = find_sec(info, "__version_ext_names"); + + /* If we have one field, we must have the other */ + if (!!vers_ext_crc != !!vers_ext_name) { + pr_err("extended version crc+name presence does not match"); + return -ENOEXEC; + } + + /* + * If we have extended version information, we should have the same + * number of entries in every section. + */ + if (vers_ext_crc) { + crc_count = info->sechdrs[vers_ext_crc].sh_size / sizeof(u32); + name = (void *)info->hdr + + info->sechdrs[vers_ext_name].sh_offset; + remaining_len = info->sechdrs[vers_ext_name].sh_size; + + while (crc_count--) { + name_size = strnlen(name, remaining_len) + 1; + if (name_size > remaining_len) { + pr_err("more extended version crcs than names"); + return -ENOEXEC; + } + remaining_len -= name_size; + name += name_size; + } + } + + info->index.vers = find_sec(info, "__versions"); + info->index.vers_ext_crc = vers_ext_crc; + info->index.vers_ext_name = vers_ext_name; + return 0; +} + +/** * elf_validity_cache_index() - Resolve, validate, cache section indices * @info: Load info to read from and update. * &load_info->sechdrs and &load_info->secstrings must be populated. @@ -2087,9 +2163,7 @@ static int elf_validity_cache_index_str(struct load_info *info) * * elf_validity_cache_index_mod() * * elf_validity_cache_index_sym() * * elf_validity_cache_index_str() - * - * If versioning is not suppressed via flags, load the version index from - * a section called "__versions" with no validation. + * * elf_validity_cache_index_versions() * * If CONFIG_SMP is enabled, load the percpu section by name with no * validation. @@ -2112,11 +2186,9 @@ static int elf_validity_cache_index(struct load_info *info, int flags) err = elf_validity_cache_index_str(info); if (err < 0) return err; - - if (flags & MODULE_INIT_IGNORE_MODVERSIONS) - info->index.vers = 0; /* Pretend no __versions section! */ - else - info->index.vers = find_sec(info, "__versions"); + err = elf_validity_cache_index_versions(info, flags); + if (err < 0) + return err; info->index.pcpu = find_pcpusec(info); @@ -2327,6 +2399,10 @@ static int rewrite_section_headers(struct load_info *info, int flags) /* Track but don't keep modinfo and version sections. */ info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC; + info->sechdrs[info->index.vers_ext_crc].sh_flags &= + ~(unsigned long)SHF_ALLOC; + info->sechdrs[info->index.vers_ext_name].sh_flags &= + ~(unsigned long)SHF_ALLOC; info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC; return 0; diff --git a/kernel/module/sysfs.c b/kernel/module/sysfs.c index f99616499e2e..b401ff4b02d2 100644 --- a/kernel/module/sysfs.c +++ b/kernel/module/sysfs.c @@ -190,7 +190,7 @@ static int add_notes_attrs(struct module *mod, const struct load_info *info) nattr->attr.mode = 0444; nattr->size = info->sechdrs[i].sh_size; nattr->private = (void *)info->sechdrs[i].sh_addr; - nattr->read = sysfs_bin_attr_simple_read; + nattr->read_new = sysfs_bin_attr_simple_read; *(gattr++) = nattr++; } ++loaded; diff --git a/kernel/module/version.c b/kernel/module/version.c index 53f43ac5a73e..3718a8868321 100644 --- a/kernel/module/version.c +++ b/kernel/module/version.c @@ -13,17 +13,34 @@ int check_version(const struct load_info *info, const char *symname, struct module *mod, - const s32 *crc) + const u32 *crc) { Elf_Shdr *sechdrs = info->sechdrs; unsigned int versindex = info->index.vers; unsigned int i, num_versions; struct modversion_info *versions; + struct modversion_info_ext version_ext; /* Exporting module didn't supply crcs? OK, we're already tainted. */ if (!crc) return 1; + /* If we have extended version info, rely on it */ + if (info->index.vers_ext_crc) { + for_each_modversion_info_ext(version_ext, info) { + if (strcmp(version_ext.name, symname) != 0) + continue; + if (*version_ext.crc == *crc) + return 1; + pr_debug("Found checksum %X vs module %X\n", + *crc, *version_ext.crc); + goto bad_version; + } + pr_warn_once("%s: no extended symbol version for %s\n", + info->name, symname); + return 1; + } + /* No versions at all? modprobe --force does this. */ if (versindex == 0) return try_to_force_load(mod, symname) == 0; @@ -87,6 +104,34 @@ int same_magic(const char *amagic, const char *bmagic, return strcmp(amagic, bmagic) == 0; } +void modversion_ext_start(const struct load_info *info, + struct modversion_info_ext *start) +{ + unsigned int crc_idx = info->index.vers_ext_crc; + unsigned int name_idx = info->index.vers_ext_name; + Elf_Shdr *sechdrs = info->sechdrs; + + /* + * Both of these fields are needed for this to be useful + * Any future fields should be initialized to NULL if absent. + */ + if (crc_idx == 0 || name_idx == 0) { + start->remaining = 0; + return; + } + + start->crc = (const u32 *)sechdrs[crc_idx].sh_addr; + start->name = (const char *)sechdrs[name_idx].sh_addr; + start->remaining = sechdrs[crc_idx].sh_size / sizeof(*start->crc); +} + +void modversion_ext_advance(struct modversion_info_ext *vers) +{ + vers->remaining--; + vers->crc++; + vers->name += strlen(vers->name) + 1; +} + /* * Generate the signature for all relevant module structures here. * If these change, we don't want to try to parse the module. diff --git a/kernel/panic.c b/kernel/panic.c index fbc59b3b64d0..d8635d5cecb2 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -84,7 +84,7 @@ ATOMIC_NOTIFIER_HEAD(panic_notifier_list); EXPORT_SYMBOL(panic_notifier_list); #ifdef CONFIG_SYSCTL -static struct ctl_table kern_panic_table[] = { +static const struct ctl_table kern_panic_table[] = { #ifdef CONFIG_SMP { .procname = "oops_all_cpu_backtrace", diff --git a/kernel/pid.c b/kernel/pid.c index 3a10a7b6fcf8..924084713be8 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -695,7 +695,7 @@ static struct ctl_table_root pid_table_root = { .set_ownership = pid_table_root_set_ownership, }; -static struct ctl_table pid_table[] = { +static const struct ctl_table pid_table[] = { { .procname = "pid_max", .data = &init_pid_ns.pid_max, diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index f1ffa032fc32..8f6cfec87555 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -303,7 +303,7 @@ static int pid_ns_ctl_handler(const struct ctl_table *table, int write, return ret; } -static struct ctl_table pid_ns_ctl_table[] = { +static const struct ctl_table pid_ns_ctl_table[] = { { .procname = "ns_last_pid", .maxlen = sizeof(int), diff --git a/kernel/pid_sysctl.h b/kernel/pid_sysctl.h index 18ecaef6be41..5d8f981de7c5 100644 --- a/kernel/pid_sysctl.h +++ b/kernel/pid_sysctl.h @@ -31,7 +31,7 @@ static int pid_mfd_noexec_dointvec_minmax(const struct ctl_table *table, return err; } -static struct ctl_table pid_ns_ctl_table_vm[] = { +static const struct ctl_table pid_ns_ctl_table_vm[] = { { .procname = "memfd_noexec", .data = &init_pid_ns.memfd_noexec_scope, diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 1f87aa01ba44..10a01af63a80 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -608,7 +608,11 @@ int hibernation_platform_enter(void) local_irq_disable(); system_state = SYSTEM_SUSPEND; - syscore_suspend(); + + error = syscore_suspend(); + if (error) + goto Enable_irqs; + if (pm_wakeup_pending()) { error = -EAGAIN; goto Power_up; @@ -620,6 +624,7 @@ int hibernation_platform_enter(void) Power_up: syscore_resume(); + Enable_irqs: system_state = SYSTEM_RUNNING; local_irq_enable(); diff --git a/kernel/printk/sysctl.c b/kernel/printk/sysctl.c index f5072dc85f7a..da77f3f5c1fe 100644 --- a/kernel/printk/sysctl.c +++ b/kernel/printk/sysctl.c @@ -20,7 +20,7 @@ static int proc_dointvec_minmax_sysadmin(const struct ctl_table *table, int writ return proc_dointvec_minmax(table, write, buffer, lenp, ppos); } -static struct ctl_table printk_sysctls[] = { +static const struct ctl_table printk_sysctls[] = { { .procname = "printk", .data = &console_loglevel, diff --git a/kernel/reboot.c b/kernel/reboot.c index a701000bab34..b5a8569e5d81 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -1287,7 +1287,7 @@ static struct attribute *reboot_attrs[] = { }; #ifdef CONFIG_SYSCTL -static struct ctl_table kern_reboot_table[] = { +static const struct ctl_table kern_reboot_table[] = { { .procname = "poweroff_cmd", .data = &poweroff_cmd, diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c index db68a964e34e..83d46b9b8ec8 100644 --- a/kernel/sched/autogroup.c +++ b/kernel/sched/autogroup.c @@ -9,7 +9,7 @@ static struct autogroup autogroup_default; static atomic_t autogroup_seq_nr; #ifdef CONFIG_SYSCTL -static struct ctl_table sched_autogroup_sysctls[] = { +static const struct ctl_table sched_autogroup_sysctls[] = { { .procname = "sched_autogroup_enabled", .data = &sysctl_sched_autogroup_enabled, diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9142a0394d46..165c90ba64ea 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4654,7 +4654,7 @@ static int sysctl_schedstats(const struct ctl_table *table, int write, void *buf #endif /* CONFIG_SCHEDSTATS */ #ifdef CONFIG_SYSCTL -static struct ctl_table sched_core_sysctls[] = { +static const struct ctl_table sched_core_sysctls[] = { #ifdef CONFIG_SCHEDSTATS { .procname = "sched_schedstats", diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index a2a29e3fffca..1a19d69b91ed 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -666,7 +666,11 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy) } sg_policy->thread = thread; - kthread_bind_mask(thread, policy->related_cpus); + if (policy->dvfs_possible_from_any_cpu) + set_cpus_allowed_ptr(thread, policy->related_cpus); + else + kthread_bind_mask(thread, policy->related_cpus); + init_irq_work(&sg_policy->irq_work, sugov_irq_work); mutex_init(&sg_policy->work_lock); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 62192ac79c30..38e4537790af 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -26,7 +26,7 @@ static unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */ static unsigned int sysctl_sched_dl_period_min = 100; /* 100 us */ #ifdef CONFIG_SYSCTL -static struct ctl_table sched_dl_sysctls[] = { +static const struct ctl_table sched_dl_sysctls[] = { { .procname = "sched_deadline_period_max_us", .data = &sysctl_sched_dl_period_max, diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1e78caa21436..ce2e94ccad0c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -133,7 +133,7 @@ static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536; #endif #ifdef CONFIG_SYSCTL -static struct ctl_table sched_fair_sysctls[] = { +static const struct ctl_table sched_fair_sysctls[] = { #ifdef CONFIG_CFS_BANDWIDTH { .procname = "sched_cfs_bandwidth_slice_us", diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index bd66a46b06ac..4b8e33c615b1 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -26,7 +26,7 @@ static int sched_rt_handler(const struct ctl_table *table, int write, void *buff size_t *lenp, loff_t *ppos); static int sched_rr_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -static struct ctl_table sched_rt_sysctls[] = { +static const struct ctl_table sched_rt_sysctls[] = { { .procname = "sched_rt_period_us", .data = &sysctl_sched_rt_period, diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c index 149e2c8036d3..456d339be98f 100644 --- a/kernel/sched/syscalls.c +++ b/kernel/sched/syscalls.c @@ -1130,6 +1130,13 @@ int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask) return 0; /* + * The special/sugov task isn't part of regular bandwidth/admission + * control so let userspace change affinities. + */ + if (dl_entity_is_special(&p->dl)) + return 0; + + /* * Since bandwidth control happens on root_domain basis, * if admission test is enabled, we only admit -deadline * tasks allowed to run on all the CPUs in the task's diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index da33ec9e94ab..c49aea8c1025 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -312,7 +312,7 @@ static int sched_energy_aware_handler(const struct ctl_table *table, int write, return ret; } -static struct ctl_table sched_energy_aware_sysctls[] = { +static const struct ctl_table sched_energy_aware_sysctls[] = { { .procname = "sched_energy_aware", .data = &sysctl_sched_energy_aware, diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 385d48293a5f..f59381c4a2ff 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -2450,7 +2450,7 @@ static int seccomp_actions_logged_handler(const struct ctl_table *ro_table, int return ret; } -static struct ctl_table seccomp_sysctl_table[] = { +static const struct ctl_table seccomp_sysctl_table[] = { { .procname = "actions_avail", .data = (void *) &seccomp_actions_avail, diff --git a/kernel/signal.c b/kernel/signal.c index a2afd54303f0..875e97f6205a 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -4950,7 +4950,7 @@ static inline void siginfo_buildtime_checks(void) } #if defined(CONFIG_SYSCTL) -static struct ctl_table signal_debug_table[] = { +static const struct ctl_table signal_debug_table[] = { #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE { .procname = "exception-trace", diff --git a/kernel/smp.c b/kernel/smp.c index f104c8e83fc4..974f3a3962e8 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -170,9 +170,9 @@ static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func); static DEFINE_PER_CPU(void *, cur_csd_info); static ulong csd_lock_timeout = 5000; /* CSD lock timeout in milliseconds. */ -module_param(csd_lock_timeout, ulong, 0444); +module_param(csd_lock_timeout, ulong, 0644); static int panic_on_ipistall; /* CSD panic timeout in milliseconds, 300000 for five minutes. */ -module_param(panic_on_ipistall, int, 0444); +module_param(panic_on_ipistall, int, 0644); static atomic_t csd_bug_count = ATOMIC_INIT(0); diff --git a/kernel/stackleak.c b/kernel/stackleak.c index 0f4804f28c61..bb65321761b4 100644 --- a/kernel/stackleak.c +++ b/kernel/stackleak.c @@ -45,7 +45,7 @@ static int stack_erasing_sysctl(const struct ctl_table *table, int write, str_enabled_disabled(state)); return ret; } -static struct ctl_table stackleak_sysctls[] = { +static const struct ctl_table stackleak_sysctls[] = { { .procname = "stack_erasing", .data = NULL, diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index da821ce258ea..8896d844d738 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -250,8 +250,8 @@ static int multi_cpu_stop(void *data) * be detected and reported on their side. */ touch_nmi_watchdog(); + rcu_momentary_eqs(); } - rcu_momentary_eqs(); } while (curstate != MULTI_STOP_EXIT); local_irq_restore(flags); diff --git a/kernel/sysctl-test.c b/kernel/sysctl-test.c index 3ac98bb7fb82..eb2842bd0557 100644 --- a/kernel/sysctl-test.c +++ b/kernel/sysctl-test.c @@ -374,7 +374,7 @@ static void sysctl_test_register_sysctl_sz_invalid_extra_value( struct kunit *test) { unsigned char data = 0; - struct ctl_table table_foo[] = { + const struct ctl_table table_foo[] = { { .procname = "foo", .data = &data, @@ -386,7 +386,7 @@ static void sysctl_test_register_sysctl_sz_invalid_extra_value( }, }; - struct ctl_table table_bar[] = { + const struct ctl_table table_bar[] = { { .procname = "bar", .data = &data, @@ -398,7 +398,7 @@ static void sysctl_test_register_sysctl_sz_invalid_extra_value( }, }; - struct ctl_table table_qux[] = { + const struct ctl_table table_qux[] = { { .procname = "qux", .data = &data, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7ae7a4136855..cb57da499ebb 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1609,7 +1609,7 @@ int proc_do_static_key(const struct ctl_table *table, int write, return ret; } -static struct ctl_table kern_table[] = { +static const struct ctl_table kern_table[] = { { .procname = "panic", .data = &panic_timeout, @@ -2021,7 +2021,7 @@ static struct ctl_table kern_table[] = { #endif }; -static struct ctl_table vm_table[] = { +static const struct ctl_table vm_table[] = { { .procname = "overcommit_memory", .data = &sysctl_overcommit_memory, diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 40706cb36920..c8f776dc6ee0 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -301,7 +301,7 @@ static int timer_migration_handler(const struct ctl_table *table, int write, return ret; } -static struct ctl_table timer_sysctl[] = { +static const struct ctl_table timer_sysctl[] = { { .procname = "timer_migration", .data = &sysctl_timer_migration, diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f79eb9386c7f..728ecda6e8d4 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -8780,7 +8780,7 @@ ftrace_enable_sysctl(const struct ctl_table *table, int write, return 0; } -static struct ctl_table ftrace_sysctls[] = { +static const struct ctl_table ftrace_sysctls[] = { { .procname = "ftrace_enabled", .data = &ftrace_enabled, diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 17bcad8f79de..97325fbd6283 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -2899,7 +2899,7 @@ static int set_max_user_events_sysctl(const struct ctl_table *table, int write, return ret; } -static struct ctl_table user_event_sysctls[] = { +static const struct ctl_table user_event_sysctls[] = { { .procname = "user_events_max", .data = &max_user_events, diff --git a/kernel/umh.c b/kernel/umh.c index be9234270777..b4da45a3a7cf 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -544,7 +544,7 @@ static int proc_cap_handler(const struct ctl_table *table, int write, return 0; } -static struct ctl_table usermodehelper_table[] = { +static const struct ctl_table usermodehelper_table[] = { { .procname = "bset", .data = &usermodehelper_bset, diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index 7282f61a8650..bfbaaecb1dd4 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c @@ -75,7 +75,7 @@ static DEFINE_CTL_TABLE_POLL(hostname_poll); static DEFINE_CTL_TABLE_POLL(domainname_poll); // Note: update 'enum uts_proc' to match any changes to this table -static struct ctl_table uts_kern_table[] = { +static const struct ctl_table uts_kern_table[] = { { .procname = "arch", .data = init_uts_ns.name.machine, @@ -129,7 +129,7 @@ static struct ctl_table uts_kern_table[] = { */ void uts_proc_notify(enum uts_proc proc) { - struct ctl_table *table = &uts_kern_table[proc]; + const struct ctl_table *table = &uts_kern_table[proc]; proc_sys_poll_notify(table->poll); } diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 177abb7d0d4e..b2da7de39d06 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -1094,7 +1094,7 @@ static int proc_watchdog_cpumask(const struct ctl_table *table, int write, static const int sixty = 60; -static struct ctl_table watchdog_sysctls[] = { +static const struct ctl_table watchdog_sysctls[] = { { .procname = "watchdog", .data = &watchdog_user_enabled, diff --git a/lib/Kconfig b/lib/Kconfig index a78d22c6507f..dccb61b7d698 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -164,34 +164,9 @@ config CRC_T10DIF config ARCH_HAS_CRC_T10DIF bool -choice - prompt "CRC-T10DIF implementation" - depends on CRC_T10DIF - default CRC_T10DIF_IMPL_ARCH if ARCH_HAS_CRC_T10DIF - default CRC_T10DIF_IMPL_GENERIC if !ARCH_HAS_CRC_T10DIF - help - This option allows you to override the default choice of CRC-T10DIF - implementation. - -config CRC_T10DIF_IMPL_ARCH - bool "Architecture-optimized" if ARCH_HAS_CRC_T10DIF - help - Use the optimized implementation of CRC-T10DIF for the selected - architecture. It is recommended to keep this enabled, as it can - greatly improve CRC-T10DIF performance. - -config CRC_T10DIF_IMPL_GENERIC - bool "Generic implementation" - help - Use the generic table-based implementation of CRC-T10DIF. Selecting - this will reduce code size slightly but can greatly reduce CRC-T10DIF - performance. - -endchoice - config CRC_T10DIF_ARCH tristate - default CRC_T10DIF if CRC_T10DIF_IMPL_ARCH + default CRC_T10DIF if ARCH_HAS_CRC_T10DIF && CRC_OPTIMIZATIONS config CRC64_ROCKSOFT tristate "CRC calculation for the Rocksoft model CRC64" @@ -223,87 +198,9 @@ config CRC32 config ARCH_HAS_CRC32 bool -choice - prompt "CRC32 implementation" - depends on CRC32 - default CRC32_IMPL_ARCH_PLUS_SLICEBY8 if ARCH_HAS_CRC32 - default CRC32_IMPL_SLICEBY8 if !ARCH_HAS_CRC32 - help - This option allows you to override the default choice of CRC32 - implementation. Choose the default unless you know that you need one - of the others. - -config CRC32_IMPL_ARCH_PLUS_SLICEBY8 - bool "Arch-optimized, with fallback to slice-by-8" if ARCH_HAS_CRC32 - help - Use architecture-optimized implementation of CRC32. Fall back to - slice-by-8 in cases where the arch-optimized implementation cannot be - used, e.g. if the CPU lacks support for the needed instructions. - - This is the default when an arch-optimized implementation exists. - -config CRC32_IMPL_ARCH_PLUS_SLICEBY1 - bool "Arch-optimized, with fallback to slice-by-1" if ARCH_HAS_CRC32 - help - Use architecture-optimized implementation of CRC32, but fall back to - slice-by-1 instead of slice-by-8 in order to reduce the binary size. - -config CRC32_IMPL_SLICEBY8 - bool "Slice by 8 bytes" - help - Calculate checksum 8 bytes at a time with a clever slicing algorithm. - This is much slower than the architecture-optimized implementation of - CRC32 (if the selected arch has one), but it is portable and is the - fastest implementation when no arch-optimized implementation is - available. It uses an 8KiB lookup table. Most modern processors have - enough cache to hold this table without thrashing the cache. - -config CRC32_IMPL_SLICEBY4 - bool "Slice by 4 bytes" - help - Calculate checksum 4 bytes at a time with a clever slicing algorithm. - This is a bit slower than slice by 8, but has a smaller 4KiB lookup - table. - - Only choose this option if you know what you are doing. - -config CRC32_IMPL_SLICEBY1 - bool "Slice by 1 byte (Sarwate's algorithm)" - help - Calculate checksum a byte at a time using Sarwate's algorithm. This - is not particularly fast, but has a small 1KiB lookup table. - - Only choose this option if you know what you are doing. - -config CRC32_IMPL_BIT - bool "Classic Algorithm (one bit at a time)" - help - Calculate checksum one bit at a time. This is VERY slow, but has - no lookup table. This is provided as a debugging option. - - Only choose this option if you are debugging crc32. - -endchoice - config CRC32_ARCH tristate - default CRC32 if CRC32_IMPL_ARCH_PLUS_SLICEBY8 || CRC32_IMPL_ARCH_PLUS_SLICEBY1 - -config CRC32_SLICEBY8 - bool - default y if CRC32_IMPL_SLICEBY8 || CRC32_IMPL_ARCH_PLUS_SLICEBY8 - -config CRC32_SLICEBY4 - bool - default y if CRC32_IMPL_SLICEBY4 - -config CRC32_SARWATE - bool - default y if CRC32_IMPL_SLICEBY1 || CRC32_IMPL_ARCH_PLUS_SLICEBY1 - -config CRC32_BIT - bool - default y if CRC32_IMPL_BIT + default CRC32 if ARCH_HAS_CRC32 && CRC_OPTIMIZATIONS config CRC64 tristate "CRC64 functions" @@ -343,6 +240,17 @@ config CRC8 when they need to do cyclic redundancy check according CRC8 algorithm. Module will be called crc8. +config CRC_OPTIMIZATIONS + bool "Enable optimized CRC implementations" if EXPERT + default y + help + Disabling this option reduces code size slightly by disabling the + architecture-optimized implementations of any CRC variants that are + enabled. CRC checksumming performance may get much slower. + + Keep this enabled unless you're really trying to minimize the size of + the kernel. + config XXHASH tristate diff --git a/lib/crc32.c b/lib/crc32.c index 47151624332e..ede6131f66fc 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -30,20 +30,6 @@ #include <linux/crc32poly.h> #include <linux/module.h> #include <linux/types.h> -#include <linux/sched.h> -#include "crc32defs.h" - -#if CRC_LE_BITS > 8 -# define tole(x) ((__force u32) cpu_to_le32(x)) -#else -# define tole(x) (x) -#endif - -#if CRC_BE_BITS > 8 -# define tobe(x) ((__force u32) cpu_to_be32(x)) -#else -# define tobe(x) (x) -#endif #include "crc32table.h" @@ -51,157 +37,20 @@ MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>"); MODULE_DESCRIPTION("Various CRC32 calculations"); MODULE_LICENSE("GPL"); -#if CRC_LE_BITS > 8 || CRC_BE_BITS > 8 - -/* implements slicing-by-4 or slicing-by-8 algorithm */ -static inline u32 __pure -crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) -{ -# ifdef __LITTLE_ENDIAN -# define DO_CRC(x) crc = t0[(crc ^ (x)) & 255] ^ (crc >> 8) -# define DO_CRC4 (t3[(q) & 255] ^ t2[(q >> 8) & 255] ^ \ - t1[(q >> 16) & 255] ^ t0[(q >> 24) & 255]) -# define DO_CRC8 (t7[(q) & 255] ^ t6[(q >> 8) & 255] ^ \ - t5[(q >> 16) & 255] ^ t4[(q >> 24) & 255]) -# else -# define DO_CRC(x) crc = t0[((crc >> 24) ^ (x)) & 255] ^ (crc << 8) -# define DO_CRC4 (t0[(q) & 255] ^ t1[(q >> 8) & 255] ^ \ - t2[(q >> 16) & 255] ^ t3[(q >> 24) & 255]) -# define DO_CRC8 (t4[(q) & 255] ^ t5[(q >> 8) & 255] ^ \ - t6[(q >> 16) & 255] ^ t7[(q >> 24) & 255]) -# endif - const u32 *b; - size_t rem_len; -# ifdef CONFIG_X86 - size_t i; -# endif - const u32 *t0=tab[0], *t1=tab[1], *t2=tab[2], *t3=tab[3]; -# if CRC_LE_BITS != 32 - const u32 *t4 = tab[4], *t5 = tab[5], *t6 = tab[6], *t7 = tab[7]; -# endif - u32 q; - - /* Align it */ - if (unlikely((long)buf & 3 && len)) { - do { - DO_CRC(*buf++); - } while ((--len) && ((long)buf)&3); - } - -# if CRC_LE_BITS == 32 - rem_len = len & 3; - len = len >> 2; -# else - rem_len = len & 7; - len = len >> 3; -# endif - - b = (const u32 *)buf; -# ifdef CONFIG_X86 - --b; - for (i = 0; i < len; i++) { -# else - for (--b; len; --len) { -# endif - q = crc ^ *++b; /* use pre increment for speed */ -# if CRC_LE_BITS == 32 - crc = DO_CRC4; -# else - crc = DO_CRC8; - q = *++b; - crc ^= DO_CRC4; -# endif - } - len = rem_len; - /* And the last few bytes */ - if (len) { - u8 *p = (u8 *)(b + 1) - 1; -# ifdef CONFIG_X86 - for (i = 0; i < len; i++) - DO_CRC(*++p); /* use pre increment for speed */ -# else - do { - DO_CRC(*++p); /* use pre increment for speed */ - } while (--len); -# endif - } - return crc; -#undef DO_CRC -#undef DO_CRC4 -#undef DO_CRC8 -} -#endif - - -/** - * crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II - * CRC32/CRC32C - * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for other - * uses, or the previous crc32/crc32c value if computing incrementally. - * @p: pointer to buffer over which CRC32/CRC32C is run - * @len: length of buffer @p - * @tab: little-endian Ethernet table - * @polynomial: CRC32/CRC32c LE polynomial - */ -static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p, - size_t len, const u32 (*tab)[256], - u32 polynomial) +u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len) { -#if CRC_LE_BITS == 1 - int i; - while (len--) { - crc ^= *p++; - for (i = 0; i < 8; i++) - crc = (crc >> 1) ^ ((crc & 1) ? polynomial : 0); - } -# elif CRC_LE_BITS == 2 - while (len--) { - crc ^= *p++; - crc = (crc >> 2) ^ tab[0][crc & 3]; - crc = (crc >> 2) ^ tab[0][crc & 3]; - crc = (crc >> 2) ^ tab[0][crc & 3]; - crc = (crc >> 2) ^ tab[0][crc & 3]; - } -# elif CRC_LE_BITS == 4 - while (len--) { - crc ^= *p++; - crc = (crc >> 4) ^ tab[0][crc & 15]; - crc = (crc >> 4) ^ tab[0][crc & 15]; - } -# elif CRC_LE_BITS == 8 - /* aka Sarwate algorithm */ - while (len--) { - crc ^= *p++; - crc = (crc >> 8) ^ tab[0][crc & 255]; - } -# else - crc = (__force u32) __cpu_to_le32(crc); - crc = crc32_body(crc, p, len, tab); - crc = __le32_to_cpu((__force __le32)crc); -#endif + while (len--) + crc = (crc >> 8) ^ crc32table_le[(crc & 255) ^ *p++]; return crc; } +EXPORT_SYMBOL(crc32_le_base); -#if CRC_LE_BITS == 1 -u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len) -{ - return crc32_le_generic(crc, p, len, NULL, CRC32_POLY_LE); -} -u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len) -{ - return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE); -} -#else -u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len) -{ - return crc32_le_generic(crc, p, len, crc32table_le, CRC32_POLY_LE); -} u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len) { - return crc32_le_generic(crc, p, len, crc32ctable_le, CRC32C_POLY_LE); + while (len--) + crc = (crc >> 8) ^ crc32ctable_le[(crc & 255) ^ *p++]; + return crc; } -#endif -EXPORT_SYMBOL(crc32_le_base); EXPORT_SYMBOL(crc32c_le_base); /* @@ -277,64 +126,10 @@ u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len) EXPORT_SYMBOL(crc32_le_shift); EXPORT_SYMBOL(__crc32c_le_shift); -/** - * crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32 - * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for - * other uses, or the previous crc32 value if computing incrementally. - * @p: pointer to buffer over which CRC32 is run - * @len: length of buffer @p - * @tab: big-endian Ethernet table - * @polynomial: CRC32 BE polynomial - */ -static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p, - size_t len, const u32 (*tab)[256], - u32 polynomial) -{ -#if CRC_BE_BITS == 1 - int i; - while (len--) { - crc ^= *p++ << 24; - for (i = 0; i < 8; i++) - crc = - (crc << 1) ^ ((crc & 0x80000000) ? polynomial : - 0); - } -# elif CRC_BE_BITS == 2 - while (len--) { - crc ^= *p++ << 24; - crc = (crc << 2) ^ tab[0][crc >> 30]; - crc = (crc << 2) ^ tab[0][crc >> 30]; - crc = (crc << 2) ^ tab[0][crc >> 30]; - crc = (crc << 2) ^ tab[0][crc >> 30]; - } -# elif CRC_BE_BITS == 4 - while (len--) { - crc ^= *p++ << 24; - crc = (crc << 4) ^ tab[0][crc >> 28]; - crc = (crc << 4) ^ tab[0][crc >> 28]; - } -# elif CRC_BE_BITS == 8 - while (len--) { - crc ^= *p++ << 24; - crc = (crc << 8) ^ tab[0][crc >> 24]; - } -# else - crc = (__force u32) __cpu_to_be32(crc); - crc = crc32_body(crc, p, len, tab); - crc = __be32_to_cpu((__force __be32)crc); -# endif - return crc; -} - -#if CRC_BE_BITS == 1 -u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len) -{ - return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE); -} -#else u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len) { - return crc32_be_generic(crc, p, len, crc32table_be, CRC32_POLY_BE); + while (len--) + crc = (crc << 8) ^ crc32table_be[(crc >> 24) ^ *p++]; + return crc; } -#endif EXPORT_SYMBOL(crc32_be_base); diff --git a/lib/crc32defs.h b/lib/crc32defs.h deleted file mode 100644 index 0c8fb5923e7e..000000000000 --- a/lib/crc32defs.h +++ /dev/null @@ -1,59 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -/* Try to choose an implementation variant via Kconfig */ -#ifdef CONFIG_CRC32_SLICEBY8 -# define CRC_LE_BITS 64 -# define CRC_BE_BITS 64 -#endif -#ifdef CONFIG_CRC32_SLICEBY4 -# define CRC_LE_BITS 32 -# define CRC_BE_BITS 32 -#endif -#ifdef CONFIG_CRC32_SARWATE -# define CRC_LE_BITS 8 -# define CRC_BE_BITS 8 -#endif -#ifdef CONFIG_CRC32_BIT -# define CRC_LE_BITS 1 -# define CRC_BE_BITS 1 -#endif - -/* - * How many bits at a time to use. Valid values are 1, 2, 4, 8, 32 and 64. - * For less performance-sensitive, use 4 or 8 to save table size. - * For larger systems choose same as CPU architecture as default. - * This works well on X86_64, SPARC64 systems. This may require some - * elaboration after experiments with other architectures. - */ -#ifndef CRC_LE_BITS -# ifdef CONFIG_64BIT -# define CRC_LE_BITS 64 -# else -# define CRC_LE_BITS 32 -# endif -#endif -#ifndef CRC_BE_BITS -# ifdef CONFIG_64BIT -# define CRC_BE_BITS 64 -# else -# define CRC_BE_BITS 32 -# endif -#endif - -/* - * Little-endian CRC computation. Used with serial bit streams sent - * lsbit-first. Be sure to use cpu_to_le32() to append the computed CRC. - */ -#if CRC_LE_BITS > 64 || CRC_LE_BITS < 1 || CRC_LE_BITS == 16 || \ - CRC_LE_BITS & CRC_LE_BITS-1 -# error "CRC_LE_BITS must be one of {1, 2, 4, 8, 32, 64}" -#endif - -/* - * Big-endian CRC computation. Used with serial bit streams sent - * msbit-first. Be sure to use cpu_to_be32() to append the computed CRC. - */ -#if CRC_BE_BITS > 64 || CRC_BE_BITS < 1 || CRC_BE_BITS == 16 || \ - CRC_BE_BITS & CRC_BE_BITS-1 -# error "CRC_BE_BITS must be one of {1, 2, 4, 8, 32, 64}" -#endif diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c index f755b997b967..6d03425b849e 100644 --- a/lib/gen_crc32table.c +++ b/lib/gen_crc32table.c @@ -2,30 +2,11 @@ #include <stdio.h> #include "../include/linux/crc32poly.h" #include "../include/generated/autoconf.h" -#include "crc32defs.h" #include <inttypes.h> -#define ENTRIES_PER_LINE 4 - -#if CRC_LE_BITS > 8 -# define LE_TABLE_ROWS (CRC_LE_BITS/8) -# define LE_TABLE_SIZE 256 -#else -# define LE_TABLE_ROWS 1 -# define LE_TABLE_SIZE (1 << CRC_LE_BITS) -#endif - -#if CRC_BE_BITS > 8 -# define BE_TABLE_ROWS (CRC_BE_BITS/8) -# define BE_TABLE_SIZE 256 -#else -# define BE_TABLE_ROWS 1 -# define BE_TABLE_SIZE (1 << CRC_BE_BITS) -#endif - -static uint32_t crc32table_le[LE_TABLE_ROWS][256]; -static uint32_t crc32table_be[BE_TABLE_ROWS][256]; -static uint32_t crc32ctable_le[LE_TABLE_ROWS][256]; +static uint32_t crc32table_le[256]; +static uint32_t crc32table_be[256]; +static uint32_t crc32ctable_le[256]; /** * crc32init_le() - allocate and initialize LE table data @@ -34,25 +15,17 @@ static uint32_t crc32ctable_le[LE_TABLE_ROWS][256]; * fact that crctable[i^j] = crctable[i] ^ crctable[j]. * */ -static void crc32init_le_generic(const uint32_t polynomial, - uint32_t (*tab)[256]) +static void crc32init_le_generic(const uint32_t polynomial, uint32_t tab[256]) { unsigned i, j; uint32_t crc = 1; - tab[0][0] = 0; + tab[0] = 0; - for (i = LE_TABLE_SIZE >> 1; i; i >>= 1) { + for (i = 128; i; i >>= 1) { crc = (crc >> 1) ^ ((crc & 1) ? polynomial : 0); - for (j = 0; j < LE_TABLE_SIZE; j += 2 * i) - tab[0][i + j] = crc ^ tab[0][j]; - } - for (i = 0; i < LE_TABLE_SIZE; i++) { - crc = tab[0][i]; - for (j = 1; j < LE_TABLE_ROWS; j++) { - crc = tab[0][crc & 0xff] ^ (crc >> 8); - tab[j][i] = crc; - } + for (j = 0; j < 256; j += 2 * i) + tab[i + j] = crc ^ tab[j]; } } @@ -74,34 +47,22 @@ static void crc32init_be(void) unsigned i, j; uint32_t crc = 0x80000000; - crc32table_be[0][0] = 0; + crc32table_be[0] = 0; - for (i = 1; i < BE_TABLE_SIZE; i <<= 1) { + for (i = 1; i < 256; i <<= 1) { crc = (crc << 1) ^ ((crc & 0x80000000) ? CRC32_POLY_BE : 0); for (j = 0; j < i; j++) - crc32table_be[0][i + j] = crc ^ crc32table_be[0][j]; - } - for (i = 0; i < BE_TABLE_SIZE; i++) { - crc = crc32table_be[0][i]; - for (j = 1; j < BE_TABLE_ROWS; j++) { - crc = crc32table_be[0][(crc >> 24) & 0xff] ^ (crc << 8); - crc32table_be[j][i] = crc; - } + crc32table_be[i + j] = crc ^ crc32table_be[j]; } } -static void output_table(uint32_t (*table)[256], int rows, int len, char *trans) +static void output_table(const uint32_t table[256]) { - int i, j; - - for (j = 0 ; j < rows; j++) { - printf("{"); - for (i = 0; i < len - 1; i++) { - if (i % ENTRIES_PER_LINE == 0) - printf("\n"); - printf("%s(0x%8.8xL), ", trans, table[j][i]); - } - printf("%s(0x%8.8xL)},\n", trans, table[j][len - 1]); + int i; + + for (i = 0; i < 256; i += 4) { + printf("\t0x%08x, 0x%08x, 0x%08x, 0x%08x,\n", + table[i], table[i + 1], table[i + 2], table[i + 3]); } } @@ -109,34 +70,20 @@ int main(int argc, char** argv) { printf("/* this file is generated - do not edit */\n\n"); - if (CRC_LE_BITS > 1) { - crc32init_le(); - printf("static const u32 ____cacheline_aligned " - "crc32table_le[%d][%d] = {", - LE_TABLE_ROWS, LE_TABLE_SIZE); - output_table(crc32table_le, LE_TABLE_ROWS, - LE_TABLE_SIZE, "tole"); - printf("};\n"); - } + crc32init_le(); + printf("static const u32 ____cacheline_aligned crc32table_le[256] = {\n"); + output_table(crc32table_le); + printf("};\n"); - if (CRC_BE_BITS > 1) { - crc32init_be(); - printf("static const u32 ____cacheline_aligned " - "crc32table_be[%d][%d] = {", - BE_TABLE_ROWS, BE_TABLE_SIZE); - output_table(crc32table_be, LE_TABLE_ROWS, - BE_TABLE_SIZE, "tobe"); - printf("};\n"); - } - if (CRC_LE_BITS > 1) { - crc32cinit_le(); - printf("static const u32 ____cacheline_aligned " - "crc32ctable_le[%d][%d] = {", - LE_TABLE_ROWS, LE_TABLE_SIZE); - output_table(crc32ctable_le, LE_TABLE_ROWS, - LE_TABLE_SIZE, "tole"); - printf("};\n"); - } + crc32init_be(); + printf("static const u32 ____cacheline_aligned crc32table_be[256] = {\n"); + output_table(crc32table_be); + printf("};\n"); + + crc32cinit_le(); + printf("static const u32 ____cacheline_aligned crc32ctable_le[256] = {\n"); + output_table(crc32ctable_le); + printf("};\n"); return 0; } diff --git a/lib/kobject.c b/lib/kobject.c index 72fa20f405f1..abe5f5b856ce 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -1096,30 +1096,6 @@ void *kobj_ns_grab_current(enum kobj_ns_type type) } EXPORT_SYMBOL_GPL(kobj_ns_grab_current); -const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk) -{ - const void *ns = NULL; - - spin_lock(&kobj_ns_type_lock); - if (kobj_ns_type_is_valid(type) && kobj_ns_ops_tbl[type]) - ns = kobj_ns_ops_tbl[type]->netlink_ns(sk); - spin_unlock(&kobj_ns_type_lock); - - return ns; -} - -const void *kobj_ns_initial(enum kobj_ns_type type) -{ - const void *ns = NULL; - - spin_lock(&kobj_ns_type_lock); - if (kobj_ns_type_is_valid(type) && kobj_ns_ops_tbl[type]) - ns = kobj_ns_ops_tbl[type]->initial_ns(); - spin_unlock(&kobj_ns_type_lock); - - return ns; -} - void kobj_ns_drop(enum kobj_ns_type type, void *ns) { spin_lock(&kobj_ns_type_lock); diff --git a/lib/stackinit_kunit.c b/lib/stackinit_kunit.c index c40818ec9c18..fbe910c9c825 100644 --- a/lib/stackinit_kunit.c +++ b/lib/stackinit_kunit.c @@ -47,10 +47,12 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size, #define DO_NOTHING_TYPE_SCALAR(var_type) var_type #define DO_NOTHING_TYPE_STRING(var_type) void #define DO_NOTHING_TYPE_STRUCT(var_type) void +#define DO_NOTHING_TYPE_UNION(var_type) void #define DO_NOTHING_RETURN_SCALAR(ptr) *(ptr) #define DO_NOTHING_RETURN_STRING(ptr) /**/ #define DO_NOTHING_RETURN_STRUCT(ptr) /**/ +#define DO_NOTHING_RETURN_UNION(ptr) /**/ #define DO_NOTHING_CALL_SCALAR(var, name) \ (var) = do_nothing_ ## name(&(var)) @@ -58,10 +60,13 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size, do_nothing_ ## name(var) #define DO_NOTHING_CALL_STRUCT(var, name) \ do_nothing_ ## name(&(var)) +#define DO_NOTHING_CALL_UNION(var, name) \ + do_nothing_ ## name(&(var)) #define FETCH_ARG_SCALAR(var) &var #define FETCH_ARG_STRING(var) var #define FETCH_ARG_STRUCT(var) &var +#define FETCH_ARG_UNION(var) &var /* * On m68k, if the leaf function test variable is longer than 8 bytes, @@ -77,6 +82,7 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size, #define INIT_CLONE_SCALAR /**/ #define INIT_CLONE_STRING [FILL_SIZE_STRING] #define INIT_CLONE_STRUCT /**/ +#define INIT_CLONE_UNION /**/ #define ZERO_CLONE_SCALAR(zero) memset(&(zero), 0x00, sizeof(zero)) #define ZERO_CLONE_STRING(zero) memset(&(zero), 0x00, sizeof(zero)) @@ -92,6 +98,7 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size, zero.three = 0; \ zero.four = 0; \ } while (0) +#define ZERO_CLONE_UNION(zero) ZERO_CLONE_STRUCT(zero) #define INIT_SCALAR_none(var_type) /**/ #define INIT_SCALAR_zero(var_type) = 0 @@ -101,6 +108,7 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size, #define INIT_STRUCT_none(var_type) /**/ #define INIT_STRUCT_zero(var_type) = { } +#define INIT_STRUCT_old_zero(var_type) = { 0 } #define __static_partial { .two = 0, } @@ -146,6 +154,34 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size, #define INIT_STRUCT_assigned_copy(var_type) \ ; var = *(arg) +/* Union initialization is the same as structs. */ +#define INIT_UNION_none(var_type) INIT_STRUCT_none(var_type) +#define INIT_UNION_zero(var_type) INIT_STRUCT_zero(var_type) +#define INIT_UNION_old_zero(var_type) INIT_STRUCT_old_zero(var_type) + +#define INIT_UNION_static_partial(var_type) \ + INIT_STRUCT_static_partial(var_type) +#define INIT_UNION_static_all(var_type) \ + INIT_STRUCT_static_all(var_type) +#define INIT_UNION_dynamic_partial(var_type) \ + INIT_STRUCT_dynamic_partial(var_type) +#define INIT_UNION_dynamic_all(var_type) \ + INIT_STRUCT_dynamic_all(var_type) +#define INIT_UNION_runtime_partial(var_type) \ + INIT_STRUCT_runtime_partial(var_type) +#define INIT_UNION_runtime_all(var_type) \ + INIT_STRUCT_runtime_all(var_type) +#define INIT_UNION_assigned_static_partial(var_type) \ + INIT_STRUCT_assigned_static_partial(var_type) +#define INIT_UNION_assigned_static_all(var_type) \ + INIT_STRUCT_assigned_static_all(var_type) +#define INIT_UNION_assigned_dynamic_partial(var_type) \ + INIT_STRUCT_assigned_dynamic_partial(var_type) +#define INIT_UNION_assigned_dynamic_all(var_type) \ + INIT_STRUCT_assigned_dynamic_all(var_type) +#define INIT_UNION_assigned_copy(var_type) \ + INIT_STRUCT_assigned_copy(var_type) + /* * @name: unique string name for the test * @var_type: type to be tested for zeroing initialization @@ -294,6 +330,33 @@ struct test_user { unsigned long four; }; +/* No padding: all members are the same size. */ +union test_same_sizes { + unsigned long one; + unsigned long two; + unsigned long three; + unsigned long four; +}; + +/* Mismatched sizes, with one and two being small */ +union test_small_start { + char one:1; + char two; + short three; + unsigned long four; + struct big_struct { + unsigned long array[8]; + } big; +}; + +/* Mismatched sizes, with one and two being small */ +union test_small_end { + short one; + unsigned long two; + char three:1; + char four; +}; + #define ALWAYS_PASS WANT_SUCCESS #define ALWAYS_FAIL XFAIL @@ -332,6 +395,11 @@ struct test_user { struct test_ ## name, STRUCT, init, \ xfail) +#define DEFINE_UNION_TEST(name, init, xfail) \ + DEFINE_TEST(name ## _ ## init, \ + union test_ ## name, STRUCT, init, \ + xfail) + #define DEFINE_STRUCT_TESTS(init, xfail) \ DEFINE_STRUCT_TEST(small_hole, init, xfail); \ DEFINE_STRUCT_TEST(big_hole, init, xfail); \ @@ -343,9 +411,22 @@ struct test_user { xfail); \ DEFINE_STRUCT_TESTS(base ## _ ## all, xfail) +#define DEFINE_UNION_INITIALIZER_TESTS(base, xfail) \ + DEFINE_UNION_TESTS(base ## _ ## partial, \ + xfail); \ + DEFINE_UNION_TESTS(base ## _ ## all, xfail) + +#define DEFINE_UNION_TESTS(init, xfail) \ + DEFINE_UNION_TEST(same_sizes, init, xfail); \ + DEFINE_UNION_TEST(small_start, init, xfail); \ + DEFINE_UNION_TEST(small_end, init, xfail); + /* These should be fully initialized all the time! */ DEFINE_SCALAR_TESTS(zero, ALWAYS_PASS); DEFINE_STRUCT_TESTS(zero, ALWAYS_PASS); +DEFINE_STRUCT_TESTS(old_zero, ALWAYS_PASS); +DEFINE_UNION_TESTS(zero, ALWAYS_PASS); +DEFINE_UNION_TESTS(old_zero, ALWAYS_PASS); /* Struct initializers: padding may be left uninitialized. */ DEFINE_STRUCT_INITIALIZER_TESTS(static, STRONG_PASS); DEFINE_STRUCT_INITIALIZER_TESTS(dynamic, STRONG_PASS); @@ -353,6 +434,12 @@ DEFINE_STRUCT_INITIALIZER_TESTS(runtime, STRONG_PASS); DEFINE_STRUCT_INITIALIZER_TESTS(assigned_static, STRONG_PASS); DEFINE_STRUCT_INITIALIZER_TESTS(assigned_dynamic, STRONG_PASS); DEFINE_STRUCT_TESTS(assigned_copy, ALWAYS_FAIL); +DEFINE_UNION_INITIALIZER_TESTS(static, STRONG_PASS); +DEFINE_UNION_INITIALIZER_TESTS(dynamic, STRONG_PASS); +DEFINE_UNION_INITIALIZER_TESTS(runtime, STRONG_PASS); +DEFINE_UNION_INITIALIZER_TESTS(assigned_static, STRONG_PASS); +DEFINE_UNION_INITIALIZER_TESTS(assigned_dynamic, STRONG_PASS); +DEFINE_UNION_TESTS(assigned_copy, ALWAYS_FAIL); /* No initialization without compiler instrumentation. */ DEFINE_SCALAR_TESTS(none, STRONG_PASS); DEFINE_STRUCT_TESTS(none, BYREF_PASS); @@ -436,13 +523,23 @@ DEFINE_TEST_DRIVER(switch_2_none, uint64_t, SCALAR, ALWAYS_FAIL); KUNIT_CASE(test_trailing_hole_ ## init),\ KUNIT_CASE(test_packed_ ## init) \ +#define KUNIT_test_unions(init) \ + KUNIT_CASE(test_same_sizes_ ## init), \ + KUNIT_CASE(test_small_start_ ## init), \ + KUNIT_CASE(test_small_end_ ## init) \ + static struct kunit_case stackinit_test_cases[] = { /* These are explicitly initialized and should always pass. */ KUNIT_test_scalars(zero), KUNIT_test_structs(zero), + KUNIT_test_structs(old_zero), + KUNIT_test_unions(zero), + KUNIT_test_unions(old_zero), /* Padding here appears to be accidentally always initialized? */ KUNIT_test_structs(dynamic_partial), KUNIT_test_structs(assigned_dynamic_partial), + KUNIT_test_unions(dynamic_partial), + KUNIT_test_unions(assigned_dynamic_partial), /* Padding initialization depends on compiler behaviors. */ KUNIT_test_structs(static_partial), KUNIT_test_structs(static_all), @@ -452,8 +549,17 @@ static struct kunit_case stackinit_test_cases[] = { KUNIT_test_structs(assigned_static_partial), KUNIT_test_structs(assigned_static_all), KUNIT_test_structs(assigned_dynamic_all), + KUNIT_test_unions(static_partial), + KUNIT_test_unions(static_all), + KUNIT_test_unions(dynamic_all), + KUNIT_test_unions(runtime_partial), + KUNIT_test_unions(runtime_all), + KUNIT_test_unions(assigned_static_partial), + KUNIT_test_unions(assigned_static_all), + KUNIT_test_unions(assigned_dynamic_all), /* Everything fails this since it effectively performs a memcpy(). */ KUNIT_test_structs(assigned_copy), + KUNIT_test_unions(assigned_copy), /* STRUCTLEAK_BYREF_ALL should cover everything from here down. */ KUNIT_test_scalars(none), KUNIT_CASE(test_switch_1_none), diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c index b6696fa1d426..4249e0cc8aaf 100644 --- a/lib/test_sysctl.c +++ b/lib/test_sysctl.c @@ -71,7 +71,7 @@ static struct test_sysctl_data test_data = { }; /* These are all under /proc/sys/debug/test_sysctl/ */ -static struct ctl_table test_table[] = { +static const struct ctl_table test_table[] = { { .procname = "int_0001", .data = &test_data.int_0001, @@ -177,7 +177,7 @@ static int test_sysctl_setup_node_tests(void) } /* Used to test that unregister actually removes the directory */ -static struct ctl_table test_table_unregister[] = { +static const struct ctl_table test_table_unregister[] = { { .procname = "unregister_error", .data = &test_data.int_0001, @@ -220,7 +220,7 @@ static int test_sysctl_run_register_mount_point(void) return 0; } -static struct ctl_table test_table_empty[] = { }; +static const struct ctl_table test_table_empty[] = { }; static int test_sysctl_run_register_empty(void) { diff --git a/mm/compaction.c b/mm/compaction.c index 73e80b2fb22e..bcc0df0066dc 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -3272,7 +3272,7 @@ static int proc_dointvec_minmax_warn_RT_change(const struct ctl_table *table, return ret; } -static struct ctl_table vm_compaction[] = { +static const struct ctl_table vm_compaction[] = { { .procname = "compact_memory", .data = &sysctl_compact_memory, diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 87761b042ed0..3b25b69aa94f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4867,7 +4867,7 @@ out: return ret; } -static struct ctl_table hugetlb_table[] = { +static const struct ctl_table hugetlb_table[] = { { .procname = "nr_hugepages", .data = NULL, diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 57b7f591eee8..7735972add01 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -693,7 +693,7 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l free_vmemmap_page_list(&vmemmap_pages); } -static struct ctl_table hugetlb_vmemmap_sysctls[] = { +static const struct ctl_table hugetlb_vmemmap_sysctls[] = { { .procname = "hugetlb_optimize_vmemmap", .data = &vmemmap_optimize_enabled, diff --git a/mm/memory-failure.c b/mm/memory-failure.c index a7b8ccd29b6f..995a15eb67e2 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -124,7 +124,7 @@ const struct attribute_group memory_failure_attr_group = { .attrs = memory_failure_attr, }; -static struct ctl_table memory_failure_table[] = { +static const struct ctl_table memory_failure_table[] = { { .procname = "memory_failure_early_kill", .data = &sysctl_memory_failure_early_kill, diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 044ebab2c941..1cf121ad7085 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -705,7 +705,7 @@ static void queue_oom_reaper(struct task_struct *tsk) } #ifdef CONFIG_SYSCTL -static struct ctl_table vm_oom_kill_table[] = { +static const struct ctl_table vm_oom_kill_table[] = { { .procname = "panic_on_oom", .data = &sysctl_panic_on_oom, diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 4f5970723cf2..eb55ece39c56 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2298,7 +2298,7 @@ static int page_writeback_cpu_online(unsigned int cpu) /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ static const unsigned long dirty_bytes_min = 2 * PAGE_SIZE; -static struct ctl_table vm_page_writeback_sysctls[] = { +static const struct ctl_table vm_page_writeback_sysctls[] = { { .procname = "dirty_background_ratio", .data = &dirty_background_ratio, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6e469c7ef9a4..579789600a3c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6166,7 +6166,7 @@ out: return ret; } -static struct ctl_table page_alloc_sysctl_table[] = { +static const struct ctl_table page_alloc_sysctl_table[] = { { .procname = "min_free_kbytes", .data = &min_free_kbytes, diff --git a/mm/shrinker_debug.c b/mm/shrinker_debug.c index 4a85b94d12ce..794bd433cce0 100644 --- a/mm/shrinker_debug.c +++ b/mm/shrinker_debug.c @@ -195,8 +195,6 @@ int shrinker_debugfs_add(struct shrinker *shrinker) int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...) { - struct dentry *entry; - char buf[128]; const char *new, *old; va_list ap; int ret = 0; @@ -213,18 +211,8 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...) old = shrinker->name; shrinker->name = new; - if (shrinker->debugfs_entry) { - snprintf(buf, sizeof(buf), "%s-%d", shrinker->name, - shrinker->debugfs_id); - - entry = debugfs_rename(shrinker_debugfs_root, - shrinker->debugfs_entry, - shrinker_debugfs_root, buf); - if (IS_ERR(entry)) - ret = PTR_ERR(entry); - else - shrinker->debugfs_entry = entry; - } + ret = debugfs_change_name(shrinker->debugfs_entry, "%s-%d", + shrinker->name, shrinker->debugfs_id); mutex_unlock(&shrinker_mutex); diff --git a/mm/slub.c b/mm/slub.c index 996691c137eb..1f50129dcfb3 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -7509,10 +7509,7 @@ static int slab_debug_trace_open(struct inode *inode, struct file *filep) return -ENOMEM; } - if (strcmp(filep->f_path.dentry->d_name.name, "alloc_traces") == 0) - alloc = TRACK_ALLOC; - else - alloc = TRACK_FREE; + alloc = debugfs_get_aux_num(filep); if (!alloc_loc_track(t, PAGE_SIZE / sizeof(struct location), GFP_KERNEL)) { bitmap_free(obj_map); @@ -7568,11 +7565,11 @@ static void debugfs_slab_add(struct kmem_cache *s) slab_cache_dir = debugfs_create_dir(s->name, slab_debugfs_root); - debugfs_create_file("alloc_traces", 0400, - slab_cache_dir, s, &slab_debugfs_fops); + debugfs_create_file_aux_num("alloc_traces", 0400, slab_cache_dir, s, + TRACK_ALLOC, &slab_debugfs_fops); - debugfs_create_file("free_traces", 0400, - slab_cache_dir, s, &slab_debugfs_fops); + debugfs_create_file_aux_num("free_traces", 0400, slab_cache_dir, s, + TRACK_FREE, &slab_debugfs_fops); } void debugfs_slab_release(struct kmem_cache *s) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 49f97d4138ea..46ea0bee2259 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -710,12 +710,12 @@ static bool l2cap_valid_mtu(struct l2cap_chan *chan, u16 mtu) { switch (chan->scid) { case L2CAP_CID_ATT: - if (mtu < L2CAP_LE_MIN_MTU) + if (mtu && mtu < L2CAP_LE_MIN_MTU) return false; break; default: - if (mtu < L2CAP_DEFAULT_MIN_MTU) + if (mtu && mtu < L2CAP_DEFAULT_MIN_MTU) return false; } diff --git a/net/core/dev.c b/net/core/dev.c index afa2282f2604..c0021cbd28fc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6708,7 +6708,7 @@ void napi_resume_irqs(unsigned int napi_id) static void __napi_hash_add_with_id(struct napi_struct *napi, unsigned int napi_id) { - napi->napi_id = napi_id; + WRITE_ONCE(napi->napi_id, napi_id); hlist_add_head_rcu(&napi->napi_hash_node, &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); } @@ -9924,6 +9924,10 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack NL_SET_ERR_MSG(extack, "Program bound to different device"); return -EINVAL; } + if (bpf_prog_is_dev_bound(new_prog->aux) && mode == XDP_MODE_SKB) { + NL_SET_ERR_MSG(extack, "Can't attach device-bound programs in generic mode"); + return -EINVAL; + } if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) { NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device"); return -EINVAL; @@ -10260,37 +10264,14 @@ static bool from_cleanup_net(void) #endif } -static void rtnl_drop_if_cleanup_net(void) -{ - if (from_cleanup_net()) - __rtnl_unlock(); -} - -static void rtnl_acquire_if_cleanup_net(void) -{ - if (from_cleanup_net()) - rtnl_lock(); -} - /* Delayed registration/unregisteration */ LIST_HEAD(net_todo_list); -static LIST_HEAD(net_todo_list_for_cleanup_net); - -/* TODO: net_todo_list/net_todo_list_for_cleanup_net should probably - * be provided by callers, instead of being static, rtnl protected. - */ -static struct list_head *todo_list(void) -{ - return from_cleanup_net() ? &net_todo_list_for_cleanup_net : - &net_todo_list; -} - DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq); atomic_t dev_unreg_count = ATOMIC_INIT(0); static void net_set_todo(struct net_device *dev) { - list_add_tail(&dev->todo_list, todo_list()); + list_add_tail(&dev->todo_list, &net_todo_list); } static netdev_features_t netdev_sync_upper_features(struct net_device *lower, @@ -11140,7 +11121,7 @@ void netdev_run_todo(void) #endif /* Snapshot list, allow later requests */ - list_replace_init(todo_list(), &list); + list_replace_init(&net_todo_list, &list); __rtnl_unlock(); @@ -11785,11 +11766,9 @@ void unregister_netdevice_many_notify(struct list_head *head, WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERING); netdev_unlock(dev); } - - rtnl_drop_if_cleanup_net(); flush_all_backlogs(); + synchronize_net(); - rtnl_acquire_if_cleanup_net(); list_for_each_entry(dev, head, unreg_list) { struct sk_buff *skb = NULL; @@ -11849,9 +11828,7 @@ void unregister_netdevice_many_notify(struct list_head *head, #endif } - rtnl_drop_if_cleanup_net(); synchronize_net(); - rtnl_acquire_if_cleanup_net(); list_for_each_entry(dev, head, unreg_list) { netdev_put(dev, &dev->dev_registered_tracker); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 1906c62dee85..f5e908c9e7ad 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -1146,7 +1146,9 @@ void page_pool_disable_direct_recycling(struct page_pool *pool) WARN_ON(!test_bit(NAPI_STATE_SCHED, &pool->p.napi->state)); WARN_ON(READ_ONCE(pool->p.napi->list_owner) != -1); + mutex_lock(&page_pools_lock); WRITE_ONCE(pool->p.napi, NULL); + mutex_unlock(&page_pools_lock); } EXPORT_SYMBOL(page_pool_disable_direct_recycling); diff --git a/net/core/page_pool_priv.h b/net/core/page_pool_priv.h index 57439787b9c2..2fb06d5f6d55 100644 --- a/net/core/page_pool_priv.h +++ b/net/core/page_pool_priv.h @@ -7,6 +7,8 @@ #include "netmem_priv.h" +extern struct mutex page_pools_lock; + s32 page_pool_inflight(const struct page_pool *pool, bool strict); int page_pool_list(struct page_pool *pool); diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c index 48335766c1bf..6677e0c2e256 100644 --- a/net/core/page_pool_user.c +++ b/net/core/page_pool_user.c @@ -3,6 +3,7 @@ #include <linux/mutex.h> #include <linux/netdevice.h> #include <linux/xarray.h> +#include <net/busy_poll.h> #include <net/net_debug.h> #include <net/netdev_rx_queue.h> #include <net/page_pool/helpers.h> @@ -14,10 +15,11 @@ #include "netdev-genl-gen.h" static DEFINE_XARRAY_FLAGS(page_pools, XA_FLAGS_ALLOC1); -/* Protects: page_pools, netdevice->page_pools, pool->slow.netdev, pool->user. +/* Protects: page_pools, netdevice->page_pools, pool->p.napi, pool->slow.netdev, + * pool->user. * Ordering: inside rtnl_lock */ -static DEFINE_MUTEX(page_pools_lock); +DEFINE_MUTEX(page_pools_lock); /* Page pools are only reachable from user space (via netlink) if they are * linked to a netdev at creation time. Following page pool "visibility" @@ -216,6 +218,7 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, { struct net_devmem_dmabuf_binding *binding = pool->mp_priv; size_t inflight, refsz; + unsigned int napi_id; void *hdr; hdr = genlmsg_iput(rsp, info); @@ -229,8 +232,10 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, nla_put_u32(rsp, NETDEV_A_PAGE_POOL_IFINDEX, pool->slow.netdev->ifindex)) goto err_cancel; - if (pool->user.napi_id && - nla_put_uint(rsp, NETDEV_A_PAGE_POOL_NAPI_ID, pool->user.napi_id)) + + napi_id = pool->p.napi ? READ_ONCE(pool->p.napi->napi_id) : 0; + if (napi_id >= MIN_NAPI_ID && + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_NAPI_ID, napi_id)) goto err_cancel; inflight = page_pool_inflight(pool, false); @@ -319,8 +324,6 @@ int page_pool_list(struct page_pool *pool) if (pool->slow.netdev) { hlist_add_head(&pool->user.list, &pool->slow.netdev->page_pools); - pool->user.napi_id = pool->p.napi ? pool->p.napi->napi_id : 0; - netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_ADD_NTF); } diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 5a7c0e565a89..e827775baf2e 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -1367,7 +1367,7 @@ static int dsa_switch_parse_of(struct dsa_switch *ds, struct device_node *dn) return dsa_switch_parse_ports_of(ds, dn); } -static int dev_is_class(struct device *dev, void *class) +static int dev_is_class(struct device *dev, const void *class) { if (dev->class != NULL && !strcmp(dev->class->name, class)) return 1; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 7bb94875a7ec..34bee42e1247 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -998,7 +998,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, ethtool_get_flow_spec_ring(info.fs.ring_cookie)) return -EINVAL; - if (ops->get_rxfh) { + if (cmd == ETHTOOL_SRXFH && ops->get_rxfh) { struct ethtool_rxfh_param rxfh = {}; rc = ops->get_rxfh(dev, &rxfh); diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c index 1a195efc79cd..5b2cfac3b2ba 100644 --- a/net/hsr/hsr_debugfs.c +++ b/net/hsr/hsr_debugfs.c @@ -57,14 +57,11 @@ DEFINE_SHOW_ATTRIBUTE(hsr_node_table); void hsr_debugfs_rename(struct net_device *dev) { struct hsr_priv *priv = netdev_priv(dev); - struct dentry *d; + int err; - d = debugfs_rename(hsr_debugfs_root_dir, priv->node_tbl_root, - hsr_debugfs_root_dir, dev->name); - if (IS_ERR(d)) + err = debugfs_change_name(priv->node_tbl_root, "%s", dev->name); + if (err) netdev_warn(dev, "failed to rename\n"); - else - priv->node_tbl_root = d; } /* hsr_debugfs_init - create hsr node_table file for dumping diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 87bb3a91598e..a4bacf198555 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -700,9 +700,12 @@ static int fill_frame_info(struct hsr_frame_info *frame, frame->is_vlan = true; if (frame->is_vlan) { - if (skb->mac_len < offsetofend(struct hsr_vlan_ethhdr, vlanhdr)) + /* Note: skb->mac_len might be wrong here. */ + if (!pskb_may_pull(skb, + skb_mac_offset(skb) + + offsetofend(struct hsr_vlan_ethhdr, vlanhdr))) return -EINVAL; - vlan_hdr = (struct hsr_vlan_ethhdr *)ethhdr; + vlan_hdr = (struct hsr_vlan_ethhdr *)skb_mac_header(skb); proto = vlan_hdr->vlanhdr.h_vlan_encapsulated_proto; } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index b0fbf804bbba..0e4076866c0a 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -279,7 +279,7 @@ static void esp_output_done(void *data, int err) x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) esp_output_tail_tcp(x, skb); else - xfrm_output_resume(skb->sk, skb, err); + xfrm_output_resume(skb_to_full_sk(skb), skb, err); } } diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index 03b6eee407a2..28d77d454d44 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -330,9 +330,6 @@ next_entry: list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { if (e < s_e) goto next_entry2; - if (filter->dev && - !mr_mfc_uses_dev(mrt, mfc, filter->dev)) - goto next_entry2; err = fill(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, mfc, RTM_NEWROUTE, flags); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0e5b9a654254..bc95d2a5924f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -265,11 +265,14 @@ static u16 tcp_select_window(struct sock *sk) u32 cur_win, new_win; /* Make the window 0 if we failed to queue the data because we - * are out of memory. The window is temporary, so we don't store - * it on the socket. + * are out of memory. */ - if (unlikely(inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOMEM)) + if (unlikely(inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOMEM)) { + tp->pred_flags = 0; + tp->rcv_wnd = 0; + tp->rcv_wup = tp->rcv_nxt; return 0; + } cur_win = tcp_receive_window(tp); new_win = __tcp_select_window(sk); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 5f3d0cc1555a..9e73944e3b53 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -315,7 +315,7 @@ static void esp_output_done(void *data, int err) x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) esp_output_tail_tcp(x, skb); else - xfrm_output_resume(skb->sk, skb, err); + xfrm_output_resume(skb_to_full_sk(skb), skb, err); } } diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 5f7b1fdbffe6..b3d5d1f266ee 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -82,14 +82,14 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) toobig = skb->len > mtu && !skb_is_gso(skb); - if (toobig && xfrm6_local_dontfrag(skb->sk)) { + if (toobig && xfrm6_local_dontfrag(sk)) { xfrm6_local_rxpmtu(skb, mtu); kfree_skb(skb); return -EMSGSIZE; } else if (toobig && xfrm6_noneed_fragment(skb)) { skb->ignore_df = 1; goto skip_frag; - } else if (!skb->ignore_df && toobig && skb->sk) { + } else if (!skb->ignore_df && toobig && sk) { xfrm_local_error(skb, mtu); kfree_skb(skb); return -EMSGSIZE; diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index e7687a7b1683..54c479910d05 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -1025,16 +1025,7 @@ void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata) void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) { - struct dentry *dir; - char buf[10 + IFNAMSIZ]; - - dir = sdata->vif.debugfs_dir; - - if (IS_ERR_OR_NULL(dir)) - return; - - sprintf(buf, "netdev:%s", sdata->name); - debugfs_rename(dir->d_parent, dir, dir->d_parent, buf); + debugfs_change_name(sdata->vif.debugfs_dir, "netdev:%s", sdata->name); } void ieee80211_debugfs_recreate_netdev(struct ieee80211_sub_if_data *sdata, diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index 3999e0ba2c35..2dd81e6c26bd 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -418,9 +418,9 @@ void mptcp_active_detect_blackhole(struct sock *ssk, bool expired) MPTCP_INC_STATS(net, MPTCP_MIB_MPCAPABLEACTIVEDROP); subflow->mpc_drop = 1; mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow); - } else { - subflow->mpc_drop = 0; } + } else if (ssk->sk_state == TCP_SYN_SENT) { + subflow->mpc_drop = 0; } } diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 123f3f297284..fd2de185bc93 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -108,7 +108,6 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->suboptions |= OPTION_MPTCP_DSS; mp_opt->use_map = 1; mp_opt->mpc_map = 1; - mp_opt->use_ack = 0; mp_opt->data_len = get_unaligned_be16(ptr); ptr += 2; } @@ -157,11 +156,6 @@ static void mptcp_parse_option(const struct sk_buff *skb, pr_debug("DSS\n"); ptr++; - /* we must clear 'mpc_map' be able to detect MP_CAPABLE - * map vs DSS map in mptcp_incoming_options(), and reconstruct - * map info accordingly - */ - mp_opt->mpc_map = 0; flags = (*ptr++) & MPTCP_DSS_FLAG_MASK; mp_opt->data_fin = (flags & MPTCP_DSS_DATA_FIN) != 0; mp_opt->dsn64 = (flags & MPTCP_DSS_DSN64) != 0; @@ -369,8 +363,11 @@ void mptcp_get_options(const struct sk_buff *skb, const unsigned char *ptr; int length; - /* initialize option status */ - mp_opt->suboptions = 0; + /* Ensure that casting the whole status to u32 is efficient and safe */ + BUILD_BUG_ON(sizeof_field(struct mptcp_options_received, status) != sizeof(u32)); + BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct mptcp_options_received, status), + sizeof(u32))); + *(u32 *)&mp_opt->status = 0; length = (th->doff * 4) - sizeof(struct tcphdr); ptr = (const unsigned char *)(th + 1); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 98ac73938bd8..572d160edca3 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -2020,7 +2020,8 @@ int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && - (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { + (entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL | + MPTCP_PM_ADDR_FLAG_IMPLICIT))) { spin_unlock_bh(&pernet->lock); GENL_SET_ERR_MSG(info, "invalid addr flags"); return -EINVAL; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c44c89ecaca6..6bd819047470 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1767,8 +1767,10 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, * see mptcp_disconnect(). * Attempt it again outside the problematic scope. */ - if (!mptcp_disconnect(sk, 0)) + if (!mptcp_disconnect(sk, 0)) { + sk->sk_disconnects++; sk->sk_socket->state = SS_UNCONNECTED; + } } inet_clear_bit(DEFER_CONNECT, sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 0174a5aad279..f6a207958459 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -149,22 +149,24 @@ struct mptcp_options_received { u32 subflow_seq; u16 data_len; __sum16 csum; - u16 suboptions; + struct_group(status, + u16 suboptions; + u16 use_map:1, + dsn64:1, + data_fin:1, + use_ack:1, + ack64:1, + mpc_map:1, + reset_reason:4, + reset_transient:1, + echo:1, + backup:1, + deny_join_id0:1, + __unused:2; + ); + u8 join_id; u32 token; u32 nonce; - u16 use_map:1, - dsn64:1, - data_fin:1, - use_ack:1, - ack64:1, - mpc_map:1, - reset_reason:4, - reset_transient:1, - echo:1, - backup:1, - deny_join_id0:1, - __unused:2; - u8 join_id; u64 thmac; u8 hmac[MPTCPOPT_HMAC_LEN]; struct mptcp_addr_info addr; diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index bf276eaf9330..7891a537bddd 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -1385,6 +1385,12 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) nd->state = ncsi_dev_state_probe_package; break; case ncsi_dev_state_probe_package: + if (ndp->package_probe_id >= 8) { + /* Last package probed, finishing */ + ndp->flags |= NCSI_DEV_PROBED; + break; + } + ndp->pending_req_num = 1; nca.type = NCSI_PKT_CMD_SP; @@ -1501,13 +1507,8 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) if (ret) goto error; - /* Probe next package */ + /* Probe next package after receiving response */ ndp->package_probe_id++; - if (ndp->package_probe_id >= 8) { - /* Probe finished */ - ndp->flags |= NCSI_DEV_PROBED; - break; - } nd->state = ncsi_dev_state_probe_package; ndp->active_package = NULL; break; diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 14bd66909ca4..4a8ce2949fae 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -1089,14 +1089,12 @@ static int ncsi_rsp_handler_netlink(struct ncsi_request *nr) static int ncsi_rsp_handler_gmcma(struct ncsi_request *nr) { struct ncsi_dev_priv *ndp = nr->ndp; + struct sockaddr *saddr = &ndp->pending_mac; struct net_device *ndev = ndp->ndev.dev; struct ncsi_rsp_gmcma_pkt *rsp; - struct sockaddr saddr; - int ret = -1; int i; rsp = (struct ncsi_rsp_gmcma_pkt *)skb_network_header(nr->rsp); - saddr.sa_family = ndev->type; ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; netdev_info(ndev, "NCSI: Received %d provisioned MAC addresses\n", @@ -1108,20 +1106,20 @@ static int ncsi_rsp_handler_gmcma(struct ncsi_request *nr) rsp->addresses[i][4], rsp->addresses[i][5]); } + saddr->sa_family = ndev->type; for (i = 0; i < rsp->address_count; i++) { - memcpy(saddr.sa_data, &rsp->addresses[i], ETH_ALEN); - ret = ndev->netdev_ops->ndo_set_mac_address(ndev, &saddr); - if (ret < 0) { + if (!is_valid_ether_addr(rsp->addresses[i])) { netdev_warn(ndev, "NCSI: Unable to assign %pM to device\n", - saddr.sa_data); + rsp->addresses[i]); continue; } - netdev_warn(ndev, "NCSI: Set MAC address to %pM\n", saddr.sa_data); + memcpy(saddr->sa_data, rsp->addresses[i], ETH_ALEN); + netdev_warn(ndev, "NCSI: Will set MAC address to %pM\n", saddr->sa_data); break; } - ndp->gma_flag = ret == 0; - return ret; + ndp->gma_flag = 1; + return 0; } static struct ncsi_rsp_handler { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 667459256e4c..a34de9c17cf1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5078,7 +5078,7 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr, static int nft_set_desc_concat(struct nft_set_desc *desc, const struct nlattr *nla) { - u32 num_regs = 0, key_num_regs = 0; + u32 len = 0, num_regs; struct nlattr *attr; int rem, err, i; @@ -5092,12 +5092,12 @@ static int nft_set_desc_concat(struct nft_set_desc *desc, } for (i = 0; i < desc->field_count; i++) - num_regs += DIV_ROUND_UP(desc->field_len[i], sizeof(u32)); + len += round_up(desc->field_len[i], sizeof(u32)); - key_num_regs = DIV_ROUND_UP(desc->klen, sizeof(u32)); - if (key_num_regs != num_regs) + if (len != desc->klen) return -EINVAL; + num_regs = DIV_ROUND_UP(desc->klen, sizeof(u32)); if (num_regs > NFT_REG32_COUNT) return -E2BIG; diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index de175318a3a0..082ab66f120b 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -542,6 +542,8 @@ static u8 nci_hci_create_pipe(struct nci_dev *ndev, u8 dest_host, pr_debug("pipe created=%d\n", pipe); + if (pipe >= NCI_HCI_MAX_PIPES) + pipe = NCI_HCI_INVALID_PIPE; return pipe; } diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c index f06ddbed3fed..1525773e94aa 100644 --- a/net/rose/rose_timer.c +++ b/net/rose/rose_timer.c @@ -122,6 +122,10 @@ static void rose_heartbeat_expiry(struct timer_list *t) struct rose_sock *rose = rose_sk(sk); bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ/20); + goto out; + } switch (rose->state) { case ROSE_STATE_0: /* Magic here: If we listen() and a new link dies before it @@ -152,6 +156,7 @@ static void rose_heartbeat_expiry(struct timer_list *t) } rose_start_heartbeat(sk); +out: bh_unlock_sock(sk); sock_put(sk); } @@ -162,6 +167,10 @@ static void rose_timer_expiry(struct timer_list *t) struct sock *sk = &rose->sock; bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + sk_reset_timer(sk, &rose->timer, jiffies + HZ/20); + goto out; + } switch (rose->state) { case ROSE_STATE_1: /* T1 */ case ROSE_STATE_4: /* T2 */ @@ -182,6 +191,7 @@ static void rose_timer_expiry(struct timer_list *t) } break; } +out: bh_unlock_sock(sk); sock_put(sk); } @@ -192,6 +202,10 @@ static void rose_idletimer_expiry(struct timer_list *t) struct sock *sk = &rose->sock; bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + sk_reset_timer(sk, &rose->idletimer, jiffies + HZ/20); + goto out; + } rose_clear_queues(sk); rose_write_internal(sk, ROSE_CLEAR_REQUEST); @@ -207,6 +221,7 @@ static void rose_idletimer_expiry(struct timer_list *t) sk->sk_state_change(sk); sock_set_flag(sk, SOCK_DEAD); } +out: bh_unlock_sock(sk); sock_put(sk); } diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index d82e44a3901b..e874c31fa901 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -246,7 +246,7 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, bool use; int slot; - spin_lock(&rxnet->peer_hash_lock); + spin_lock_bh(&rxnet->peer_hash_lock); while (!list_empty(collector)) { peer = list_entry(collector->next, @@ -257,7 +257,7 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, continue; use = __rxrpc_use_local(peer->local, rxrpc_local_use_peer_keepalive); - spin_unlock(&rxnet->peer_hash_lock); + spin_unlock_bh(&rxnet->peer_hash_lock); if (use) { keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME; @@ -277,17 +277,17 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, */ slot += cursor; slot &= mask; - spin_lock(&rxnet->peer_hash_lock); + spin_lock_bh(&rxnet->peer_hash_lock); list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive[slot & mask]); - spin_unlock(&rxnet->peer_hash_lock); + spin_unlock_bh(&rxnet->peer_hash_lock); rxrpc_unuse_local(peer->local, rxrpc_local_unuse_peer_keepalive); } rxrpc_put_peer(peer, rxrpc_peer_put_keepalive); - spin_lock(&rxnet->peer_hash_lock); + spin_lock_bh(&rxnet->peer_hash_lock); } - spin_unlock(&rxnet->peer_hash_lock); + spin_unlock_bh(&rxnet->peer_hash_lock); } /* @@ -317,7 +317,7 @@ void rxrpc_peer_keepalive_worker(struct work_struct *work) * second; the bucket at cursor + 1 goes at now + 1s and so * on... */ - spin_lock(&rxnet->peer_hash_lock); + spin_lock_bh(&rxnet->peer_hash_lock); list_splice_init(&rxnet->peer_keepalive_new, &collector); stop = cursor + ARRAY_SIZE(rxnet->peer_keepalive); @@ -329,7 +329,7 @@ void rxrpc_peer_keepalive_worker(struct work_struct *work) } base = now; - spin_unlock(&rxnet->peer_hash_lock); + spin_unlock_bh(&rxnet->peer_hash_lock); rxnet->peer_keepalive_base = base; rxnet->peer_keepalive_cursor = cursor; diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index e1c63129586b..0fcc87f0409f 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -325,10 +325,10 @@ void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer) hash_key = rxrpc_peer_hash_key(local, &peer->srx); rxrpc_init_peer(local, peer, hash_key); - spin_lock(&rxnet->peer_hash_lock); + spin_lock_bh(&rxnet->peer_hash_lock); hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key); list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive_new); - spin_unlock(&rxnet->peer_hash_lock); + spin_unlock_bh(&rxnet->peer_hash_lock); } /* @@ -360,7 +360,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, return NULL; } - spin_lock(&rxnet->peer_hash_lock); + spin_lock_bh(&rxnet->peer_hash_lock); /* Need to check that we aren't racing with someone else */ peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); @@ -373,7 +373,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, &rxnet->peer_keepalive_new); } - spin_unlock(&rxnet->peer_hash_lock); + spin_unlock_bh(&rxnet->peer_hash_lock); if (peer) rxrpc_free_peer(candidate); @@ -423,10 +423,10 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer) ASSERT(hlist_empty(&peer->error_targets)); - spin_lock(&rxnet->peer_hash_lock); + spin_lock_bh(&rxnet->peer_hash_lock); hash_del_rcu(&peer->hash_link); list_del_init(&peer->keepalive_link); - spin_unlock(&rxnet->peer_hash_lock); + spin_unlock_bh(&rxnet->peer_hash_lock); rxrpc_free_peer(peer); } diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index f80bc05d4c5a..516038a44163 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -91,6 +91,8 @@ ets_class_from_arg(struct Qdisc *sch, unsigned long arg) { struct ets_sched *q = qdisc_priv(sch); + if (arg == 0 || arg > q->nbands) + return NULL; return &q->classes[arg - 1]; } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 0090162ee8c3..2fe88ea79a70 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -958,12 +958,17 @@ void rpc_shutdown_client(struct rpc_clnt *clnt) trace_rpc_clnt_shutdown(clnt); + clnt->cl_shutdown = 1; while (!list_empty(&clnt->cl_tasks)) { rpc_killall_tasks(clnt); wait_event_timeout(destroy_wait, list_empty(&clnt->cl_tasks), 1*HZ); } + /* wait for tasks still in workqueue or waitqueue */ + wait_event_timeout(destroy_wait, + atomic_read(&clnt->cl_task_count) == 0, 1 * HZ); + rpc_release_client(clnt); } EXPORT_SYMBOL_GPL(rpc_shutdown_client); @@ -1139,6 +1144,7 @@ void rpc_task_release_client(struct rpc_task *task) list_del(&task->tk_task); spin_unlock(&clnt->cl_lock); task->tk_client = NULL; + atomic_dec(&clnt->cl_task_count); rpc_release_client(clnt); } @@ -1189,10 +1195,7 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) task->tk_flags |= RPC_TASK_TIMEOUT; if (clnt->cl_noretranstimeo) task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; - /* Add to the client's list of all tasks */ - spin_lock(&clnt->cl_lock); - list_add_tail(&task->tk_task, &clnt->cl_tasks); - spin_unlock(&clnt->cl_lock); + atomic_inc(&clnt->cl_task_count); } static void @@ -1787,9 +1790,14 @@ call_reserveresult(struct rpc_task *task) if (status >= 0) { if (task->tk_rqstp) { task->tk_action = call_refresh; + + /* Add to the client's list of all tasks */ + spin_lock(&task->tk_client->cl_lock); + if (list_empty(&task->tk_task)) + list_add_tail(&task->tk_task, &task->tk_client->cl_tasks); + spin_unlock(&task->tk_client->cl_lock); return; } - rpc_call_rpcerror(task, -EIO); return; } @@ -1854,13 +1862,13 @@ call_refreshresult(struct rpc_task *task) fallthrough; case -EAGAIN: status = -EACCES; - fallthrough; - case -EKEYEXPIRED: if (!task->tk_cred_retry) break; task->tk_cred_retry--; trace_rpc_retry_refresh_status(task); return; + case -EKEYEXPIRED: + break; case -ENOMEM: rpc_delay(task, HZ >> 4); return; @@ -3319,8 +3327,11 @@ bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_has_addr); #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -static void rpc_show_header(void) +static void rpc_show_header(struct rpc_clnt *clnt) { + printk(KERN_INFO "clnt[%pISpc] RPC tasks[%d]\n", + (struct sockaddr *)&clnt->cl_xprt->addr, + atomic_read(&clnt->cl_task_count)); printk(KERN_INFO "-pid- flgs status -client- --rqstp- " "-timeout ---ops--\n"); } @@ -3352,7 +3363,7 @@ void rpc_show_tasks(struct net *net) spin_lock(&clnt->cl_lock); list_for_each_entry(task, &clnt->cl_tasks, tk_task) { if (!header) { - rpc_show_header(); + rpc_show_header(clnt); header++; } rpc_show_task(clnt, task); diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c index a176d5a0b0ee..32417db340de 100644 --- a/net/sunrpc/debugfs.c +++ b/net/sunrpc/debugfs.c @@ -74,6 +74,9 @@ tasks_stop(struct seq_file *f, void *v) { struct rpc_clnt *clnt = f->private; spin_unlock(&clnt->cl_lock); + seq_printf(f, "clnt[%pISpc] RPC tasks[%d]\n", + (struct sockaddr *)&clnt->cl_xprt->addr, + atomic_read(&clnt->cl_task_count)); } static const struct seq_operations tasks_seq_operations = { @@ -179,6 +182,18 @@ xprt_info_show(struct seq_file *f, void *v) seq_printf(f, "addr: %s\n", xprt->address_strings[RPC_DISPLAY_ADDR]); seq_printf(f, "port: %s\n", xprt->address_strings[RPC_DISPLAY_PORT]); seq_printf(f, "state: 0x%lx\n", xprt->state); + seq_printf(f, "netns: %u\n", xprt->xprt_net->ns.inum); + + if (xprt->ops->get_srcaddr) { + int ret, buflen; + char buf[INET6_ADDRSTRLEN]; + + buflen = ARRAY_SIZE(buf); + ret = xprt->ops->get_srcaddr(xprt, buf, buflen); + if (ret < 0) + ret = sprintf(buf, "<closed>"); + seq_printf(f, "saddr: %.*s\n", ret, buf); + } return 0; } diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index fa9d1b49599b..075695173648 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -337,7 +337,10 @@ EXPORT_SYMBOL_GPL(vsock_find_connected_socket); void vsock_remove_sock(struct vsock_sock *vsk) { - vsock_remove_bound(vsk); + /* Transport reassignment must not remove the binding. */ + if (sock_flag(sk_vsock(vsk), SOCK_DEAD)) + vsock_remove_bound(vsk); + vsock_remove_connected(vsk); } EXPORT_SYMBOL_GPL(vsock_remove_sock); @@ -821,12 +824,13 @@ static void __vsock_release(struct sock *sk, int level) */ lock_sock_nested(sk, level); + sock_orphan(sk); + if (vsk->transport) vsk->transport->release(vsk); else if (sock_type_connectible(sk->sk_type)) vsock_remove_sock(vsk); - sock_orphan(sk); sk->sk_shutdown = SHUTDOWN_MASK; skb_queue_purge(&sk->sk_receive_queue); @@ -1519,6 +1523,11 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr, if (err < 0) goto out; + /* sk_err might have been set as a result of an earlier + * (failed) connect attempt. + */ + sk->sk_err = 0; + /* Mark sock as connecting and set the error code to in * progress in case this is a non-blocking connect. */ diff --git a/net/wireless/core.c b/net/wireless/core.c index 70857018f020..12b780de8779 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -143,10 +143,7 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, if (result) return result; - if (!IS_ERR_OR_NULL(rdev->wiphy.debugfsdir)) - debugfs_rename(rdev->wiphy.debugfsdir->d_parent, - rdev->wiphy.debugfsdir, - rdev->wiphy.debugfsdir->d_parent, newname); + debugfs_change_name(rdev->wiphy.debugfsdir, "%s", newname); nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY); diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index 98f1e2b67c76..c397eb99d867 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -506,7 +506,7 @@ xmit: skb_dst_set(skb, dst); skb->dev = tdev; - err = dst_output(xi->net, skb->sk, skb); + err = dst_output(xi->net, skb_to_full_sk(skb), skb); if (net_xmit_eval(err) == 0) { dev_sw_netstats_tx_add(dev, 1, length); } else { diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index b5025cf6136e..f7abd42c077d 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -802,7 +802,7 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) { skb->protocol = htons(ETH_P_IP); - if (skb->sk) + if (skb->sk && sk_fullsock(skb->sk)) xfrm_local_error(skb, mtu); else icmp_send(skb, ICMP_DEST_UNREACH, @@ -838,6 +838,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) { int mtu, ret = 0; struct dst_entry *dst = skb_dst(skb); + struct sock *sk = skb_to_full_sk(skb); if (skb->ignore_df) goto out; @@ -852,9 +853,9 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) skb->dev = dst->dev; skb->protocol = htons(ETH_P_IPV6); - if (xfrm6_local_dontfrag(skb->sk)) + if (xfrm6_local_dontfrag(sk)) ipv6_stub->xfrm6_local_rxpmtu(skb, mtu); - else if (skb->sk) + else if (sk) xfrm_local_error(skb, mtu); else icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9e510021ee91..6551e588fe52 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2964,7 +2964,7 @@ static void xfrm_policy_queue_process(struct timer_list *t) skb_dst_drop(skb); skb_dst_set(skb, dst); - dst_output(net, skb->sk, skb); + dst_output(net, skb_to_full_sk(skb), skb); } out: diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index e500aebbad22..dbdf8a39dffe 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -714,10 +714,12 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff oseq += skb_shinfo(skb)->gso_segs; } - if (unlikely(xo->seq.low < replay_esn->oseq)) { - XFRM_SKB_CB(skb)->seq.output.hi = ++oseq_hi; - xo->seq.hi = oseq_hi; - replay_esn->oseq_hi = oseq_hi; + if (unlikely(oseq < replay_esn->oseq)) { + replay_esn->oseq_hi = ++oseq_hi; + if (xo->seq.low < replay_esn->oseq) { + XFRM_SKB_CB(skb)->seq.output.hi = oseq_hi; + xo->seq.hi = oseq_hi; + } if (replay_esn->oseq_hi == 0) { replay_esn->oseq--; replay_esn->oseq_hi--; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 34067cb8a479..ad2202fa82f3 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -34,6 +34,8 @@ #define xfrm_state_deref_prot(table, net) \ rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock)) +#define xfrm_state_deref_check(table, net) \ + rcu_dereference_check((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock)) static void xfrm_state_gc_task(struct work_struct *work); @@ -62,6 +64,8 @@ static inline unsigned int xfrm_dst_hash(struct net *net, u32 reqid, unsigned short family) { + lockdep_assert_held(&net->xfrm.xfrm_state_lock); + return __xfrm_dst_hash(daddr, saddr, reqid, family, net->xfrm.state_hmask); } @@ -70,6 +74,8 @@ static inline unsigned int xfrm_src_hash(struct net *net, const xfrm_address_t *saddr, unsigned short family) { + lockdep_assert_held(&net->xfrm.xfrm_state_lock); + return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask); } @@ -77,11 +83,15 @@ static inline unsigned int xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { + lockdep_assert_held(&net->xfrm.xfrm_state_lock); + return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask); } static unsigned int xfrm_seq_hash(struct net *net, u32 seq) { + lockdep_assert_held(&net->xfrm.xfrm_state_lock); + return __xfrm_seq_hash(seq, net->xfrm.state_hmask); } @@ -1108,16 +1118,38 @@ xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl, x->props.family = tmpl->encap_family; } -static struct xfrm_state *__xfrm_state_lookup_all(struct net *net, u32 mark, +struct xfrm_hash_state_ptrs { + const struct hlist_head *bydst; + const struct hlist_head *bysrc; + const struct hlist_head *byspi; + unsigned int hmask; +}; + +static void xfrm_hash_ptrs_get(const struct net *net, struct xfrm_hash_state_ptrs *ptrs) +{ + unsigned int sequence; + + do { + sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation); + + ptrs->bydst = xfrm_state_deref_check(net->xfrm.state_bydst, net); + ptrs->bysrc = xfrm_state_deref_check(net->xfrm.state_bysrc, net); + ptrs->byspi = xfrm_state_deref_check(net->xfrm.state_byspi, net); + ptrs->hmask = net->xfrm.state_hmask; + } while (read_seqcount_retry(&net->xfrm.xfrm_state_hash_generation, sequence)); +} + +static struct xfrm_state *__xfrm_state_lookup_all(const struct xfrm_hash_state_ptrs *state_ptrs, + u32 mark, const xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family, struct xfrm_dev_offload *xdo) { - unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); + unsigned int h = __xfrm_spi_hash(daddr, spi, proto, family, state_ptrs->hmask); struct xfrm_state *x; - hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) { + hlist_for_each_entry_rcu(x, state_ptrs->byspi + h, byspi) { #ifdef CONFIG_XFRM_OFFLOAD if (xdo->type == XFRM_DEV_OFFLOAD_PACKET) { if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) @@ -1151,15 +1183,16 @@ static struct xfrm_state *__xfrm_state_lookup_all(struct net *net, u32 mark, return NULL; } -static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, +static struct xfrm_state *__xfrm_state_lookup(const struct xfrm_hash_state_ptrs *state_ptrs, + u32 mark, const xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { - unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); + unsigned int h = __xfrm_spi_hash(daddr, spi, proto, family, state_ptrs->hmask); struct xfrm_state *x; - hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) { + hlist_for_each_entry_rcu(x, state_ptrs->byspi + h, byspi) { if (x->props.family != family || x->id.spi != spi || x->id.proto != proto || @@ -1181,11 +1214,11 @@ struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark, __be32 spi, u8 proto, unsigned short family) { + struct xfrm_hash_state_ptrs state_ptrs; struct hlist_head *state_cache_input; struct xfrm_state *x = NULL; - int cpu = get_cpu(); - state_cache_input = per_cpu_ptr(net->xfrm.state_cache_input, cpu); + state_cache_input = raw_cpu_ptr(net->xfrm.state_cache_input); rcu_read_lock(); hlist_for_each_entry_rcu(x, state_cache_input, state_cache_input) { @@ -1202,7 +1235,9 @@ struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark, goto out; } - x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family); + xfrm_hash_ptrs_get(net, &state_ptrs); + + x = __xfrm_state_lookup(&state_ptrs, mark, daddr, spi, proto, family); if (x && x->km.state == XFRM_STATE_VALID) { spin_lock_bh(&net->xfrm.xfrm_state_lock); @@ -1217,20 +1252,20 @@ struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark, out: rcu_read_unlock(); - put_cpu(); return x; } EXPORT_SYMBOL(xfrm_input_state_lookup); -static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, +static struct xfrm_state *__xfrm_state_lookup_byaddr(const struct xfrm_hash_state_ptrs *state_ptrs, + u32 mark, const xfrm_address_t *daddr, const xfrm_address_t *saddr, u8 proto, unsigned short family) { - unsigned int h = xfrm_src_hash(net, daddr, saddr, family); + unsigned int h = __xfrm_src_hash(daddr, saddr, family, state_ptrs->hmask); struct xfrm_state *x; - hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) { + hlist_for_each_entry_rcu(x, state_ptrs->bysrc + h, bysrc) { if (x->props.family != family || x->id.proto != proto || !xfrm_addr_equal(&x->id.daddr, daddr, family) || @@ -1250,14 +1285,17 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, static inline struct xfrm_state * __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family) { + struct xfrm_hash_state_ptrs state_ptrs; struct net *net = xs_net(x); u32 mark = x->mark.v & x->mark.m; + xfrm_hash_ptrs_get(net, &state_ptrs); + if (use_spi) - return __xfrm_state_lookup(net, mark, &x->id.daddr, + return __xfrm_state_lookup(&state_ptrs, mark, &x->id.daddr, x->id.spi, x->id.proto, family); else - return __xfrm_state_lookup_byaddr(net, mark, + return __xfrm_state_lookup_byaddr(&state_ptrs, mark, &x->id.daddr, &x->props.saddr, x->id.proto, family); @@ -1331,6 +1369,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, unsigned short family, u32 if_id) { static xfrm_address_t saddr_wildcard = { }; + struct xfrm_hash_state_ptrs state_ptrs; struct net *net = xp_net(pol); unsigned int h, h_wildcard; struct xfrm_state *x, *x0, *to_put; @@ -1395,8 +1434,10 @@ cached: else if (acquire_in_progress) /* XXX: acquire_in_progress should not happen */ WARN_ON(1); - h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); - hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) { + xfrm_hash_ptrs_get(net, &state_ptrs); + + h = __xfrm_dst_hash(daddr, saddr, tmpl->reqid, encap_family, state_ptrs.hmask); + hlist_for_each_entry_rcu(x, state_ptrs.bydst + h, bydst) { #ifdef CONFIG_XFRM_OFFLOAD if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) { if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) @@ -1429,8 +1470,9 @@ cached: if (best || acquire_in_progress) goto found; - h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family); - hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) { + h_wildcard = __xfrm_dst_hash(daddr, &saddr_wildcard, tmpl->reqid, + encap_family, state_ptrs.hmask); + hlist_for_each_entry_rcu(x, state_ptrs.bydst + h_wildcard, bydst) { #ifdef CONFIG_XFRM_OFFLOAD if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) { if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) @@ -1468,7 +1510,7 @@ found: if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && - (x0 = __xfrm_state_lookup_all(net, mark, daddr, + (x0 = __xfrm_state_lookup_all(&state_ptrs, mark, daddr, tmpl->id.spi, tmpl->id.proto, encap_family, &pol->xdo)) != NULL) { @@ -2253,10 +2295,13 @@ struct xfrm_state * xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { + struct xfrm_hash_state_ptrs state_ptrs; struct xfrm_state *x; rcu_read_lock(); - x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family); + xfrm_hash_ptrs_get(net, &state_ptrs); + + x = __xfrm_state_lookup(&state_ptrs, mark, daddr, spi, proto, family); rcu_read_unlock(); return x; } @@ -2267,10 +2312,14 @@ xfrm_state_lookup_byaddr(struct net *net, u32 mark, const xfrm_address_t *daddr, const xfrm_address_t *saddr, u8 proto, unsigned short family) { + struct xfrm_hash_state_ptrs state_ptrs; struct xfrm_state *x; spin_lock_bh(&net->xfrm.xfrm_state_lock); - x = __xfrm_state_lookup_byaddr(net, mark, daddr, saddr, proto, family); + + xfrm_hash_ptrs_get(net, &state_ptrs); + + x = __xfrm_state_lookup_byaddr(&state_ptrs, mark, daddr, saddr, proto, family); spin_unlock_bh(&net->xfrm.xfrm_state_lock); return x; } diff --git a/rust/Makefile b/rust/Makefile index 71a05a3c895a..8fcfd60447bc 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -331,10 +331,11 @@ $(obj)/bindings/bindings_helpers_generated.rs: private bindgen_target_extra = ; $(obj)/bindings/bindings_helpers_generated.rs: $(src)/helpers/helpers.c FORCE $(call if_changed_dep,bindgen) +rust_exports = $(NM) -p --defined-only $(1) | awk '$$2~/(T|R|D|B)/ && $$3!~/__cfi/ { printf $(2),$$3 }' + quiet_cmd_exports = EXPORTS $@ cmd_exports = \ - $(NM) -p --defined-only $< \ - | awk '$$2~/(T|R|D|B)/ && $$3!~/__cfi/ {printf "EXPORT_SYMBOL_RUST_GPL(%s);\n",$$3}' > $@ + $(call rust_exports,$<,"EXPORT_SYMBOL_RUST_GPL(%s);\n") > $@ $(obj)/exports_core_generated.h: $(obj)/core.o FORCE $(call if_changed,exports) @@ -403,11 +404,36 @@ ifneq ($(or $(CONFIG_ARM64),$(and $(CONFIG_RISCV),$(CONFIG_64BIT))),) __ashlti3 __lshrti3 endif +ifdef CONFIG_MODVERSIONS +cmd_gendwarfksyms = $(if $(skip_gendwarfksyms),, \ + $(call rust_exports,$@,"%s\n") | \ + scripts/gendwarfksyms/gendwarfksyms \ + $(if $(KBUILD_GENDWARFKSYMS_STABLE), --stable) \ + $(if $(KBUILD_SYMTYPES), --symtypes $(@:.o=.symtypes),) \ + $@ >> $(dot-target).cmd) +endif + define rule_rustc_library $(call cmd_and_fixdep,rustc_library) $(call cmd,gen_objtooldep) + $(call cmd,gendwarfksyms) endef +define rule_rust_cc_library + $(call if_changed_rule,cc_o_c) + $(call cmd,force_checksrc) + $(call cmd,gendwarfksyms) +endef + +# helpers.o uses the same export mechanism as Rust libraries, so ensure symbol +# versions are calculated for the helpers too. +$(obj)/helpers/helpers.o: $(src)/helpers/helpers.c $(recordmcount_source) FORCE + +$(call if_changed_rule,rust_cc_library) + +# Disable symbol versioning for exports.o to avoid conflicts with the actual +# symbol versions generated from Rust objects. +$(obj)/exports.o: private skip_gendwarfksyms = 1 + $(obj)/core.o: private skip_clippy = 1 $(obj)/core.o: private skip_flags = -Wunreachable_pub $(obj)/core.o: private rustc_objcopy = $(foreach sym,$(redirect-intrinsics),--redefine-sym $(sym)=__rust$(sym)) @@ -419,13 +445,16 @@ ifneq ($(or $(CONFIG_X86_64),$(CONFIG_X86_32)),) $(obj)/core.o: scripts/target.json endif +$(obj)/compiler_builtins.o: private skip_gendwarfksyms = 1 $(obj)/compiler_builtins.o: private rustc_objcopy = -w -W '__*' $(obj)/compiler_builtins.o: $(src)/compiler_builtins.rs $(obj)/core.o FORCE +$(call if_changed_rule,rustc_library) +$(obj)/build_error.o: private skip_gendwarfksyms = 1 $(obj)/build_error.o: $(src)/build_error.rs $(obj)/compiler_builtins.o FORCE +$(call if_changed_rule,rustc_library) +$(obj)/ffi.o: private skip_gendwarfksyms = 1 $(obj)/ffi.o: $(src)/ffi.rs $(obj)/compiler_builtins.o FORCE +$(call if_changed_rule,rustc_library) @@ -437,6 +466,7 @@ $(obj)/bindings.o: $(src)/bindings/lib.rs \ +$(call if_changed_rule,rustc_library) $(obj)/uapi.o: private rustc_target_flags = --extern ffi +$(obj)/uapi.o: private skip_gendwarfksyms = 1 $(obj)/uapi.o: $(src)/uapi/lib.rs \ $(obj)/ffi.o \ $(obj)/uapi/uapi_generated.rs FORCE diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index 5c4dfe22f41a..55354e4dec14 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -20,9 +20,13 @@ #include <linux/jump_label.h> #include <linux/mdio.h> #include <linux/miscdevice.h> +#include <linux/of_device.h> +#include <linux/pci.h> #include <linux/phy.h> #include <linux/pid_namespace.h> +#include <linux/platform_device.h> #include <linux/poll.h> +#include <linux/property.h> #include <linux/refcount.h> #include <linux/sched.h> #include <linux/security.h> diff --git a/rust/helpers/device.c b/rust/helpers/device.c new file mode 100644 index 000000000000..b2135c6686b0 --- /dev/null +++ b/rust/helpers/device.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/device.h> + +int rust_helper_devm_add_action(struct device *dev, + void (*action)(void *), + void *data) +{ + return devm_add_action(dev, action, data); +} diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c index dcf827a61b52..0640b7e115be 100644 --- a/rust/helpers/helpers.c +++ b/rust/helpers/helpers.c @@ -12,14 +12,19 @@ #include "build_assert.c" #include "build_bug.c" #include "cred.c" +#include "device.c" #include "err.c" #include "fs.c" +#include "io.c" #include "jump_label.c" #include "kunit.c" #include "mutex.c" #include "page.c" +#include "platform.c" +#include "pci.c" #include "pid_namespace.c" #include "rbtree.c" +#include "rcu.c" #include "refcount.c" #include "security.c" #include "signal.c" diff --git a/rust/helpers/io.c b/rust/helpers/io.c new file mode 100644 index 000000000000..4c2401ccd720 --- /dev/null +++ b/rust/helpers/io.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/io.h> + +void __iomem *rust_helper_ioremap(phys_addr_t offset, size_t size) +{ + return ioremap(offset, size); +} + +void rust_helper_iounmap(volatile void __iomem *addr) +{ + iounmap(addr); +} + +u8 rust_helper_readb(const volatile void __iomem *addr) +{ + return readb(addr); +} + +u16 rust_helper_readw(const volatile void __iomem *addr) +{ + return readw(addr); +} + +u32 rust_helper_readl(const volatile void __iomem *addr) +{ + return readl(addr); +} + +#ifdef CONFIG_64BIT +u64 rust_helper_readq(const volatile void __iomem *addr) +{ + return readq(addr); +} +#endif + +void rust_helper_writeb(u8 value, volatile void __iomem *addr) +{ + writeb(value, addr); +} + +void rust_helper_writew(u16 value, volatile void __iomem *addr) +{ + writew(value, addr); +} + +void rust_helper_writel(u32 value, volatile void __iomem *addr) +{ + writel(value, addr); +} + +#ifdef CONFIG_64BIT +void rust_helper_writeq(u64 value, volatile void __iomem *addr) +{ + writeq(value, addr); +} +#endif + +u8 rust_helper_readb_relaxed(const volatile void __iomem *addr) +{ + return readb_relaxed(addr); +} + +u16 rust_helper_readw_relaxed(const volatile void __iomem *addr) +{ + return readw_relaxed(addr); +} + +u32 rust_helper_readl_relaxed(const volatile void __iomem *addr) +{ + return readl_relaxed(addr); +} + +#ifdef CONFIG_64BIT +u64 rust_helper_readq_relaxed(const volatile void __iomem *addr) +{ + return readq_relaxed(addr); +} +#endif + +void rust_helper_writeb_relaxed(u8 value, volatile void __iomem *addr) +{ + writeb_relaxed(value, addr); +} + +void rust_helper_writew_relaxed(u16 value, volatile void __iomem *addr) +{ + writew_relaxed(value, addr); +} + +void rust_helper_writel_relaxed(u32 value, volatile void __iomem *addr) +{ + writel_relaxed(value, addr); +} + +#ifdef CONFIG_64BIT +void rust_helper_writeq_relaxed(u64 value, volatile void __iomem *addr) +{ + writeq_relaxed(value, addr); +} +#endif diff --git a/rust/helpers/pci.c b/rust/helpers/pci.c new file mode 100644 index 000000000000..8ba22f911459 --- /dev/null +++ b/rust/helpers/pci.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/pci.h> + +void rust_helper_pci_set_drvdata(struct pci_dev *pdev, void *data) +{ + pci_set_drvdata(pdev, data); +} + +void *rust_helper_pci_get_drvdata(struct pci_dev *pdev) +{ + return pci_get_drvdata(pdev); +} + +resource_size_t rust_helper_pci_resource_len(struct pci_dev *pdev, int bar) +{ + return pci_resource_len(pdev, bar); +} diff --git a/rust/helpers/platform.c b/rust/helpers/platform.c new file mode 100644 index 000000000000..ab9b9f317301 --- /dev/null +++ b/rust/helpers/platform.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/platform_device.h> + +void *rust_helper_platform_get_drvdata(const struct platform_device *pdev) +{ + return platform_get_drvdata(pdev); +} + +void rust_helper_platform_set_drvdata(struct platform_device *pdev, void *data) +{ + platform_set_drvdata(pdev, data); +} diff --git a/rust/helpers/rcu.c b/rust/helpers/rcu.c new file mode 100644 index 000000000000..f1cec6583513 --- /dev/null +++ b/rust/helpers/rcu.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/rcupdate.h> + +void rust_helper_rcu_read_lock(void) +{ + rcu_read_lock(); +} + +void rust_helper_rcu_read_unlock(void) +{ + rcu_read_unlock(); +} diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index d5e6a19ff6b7..db2d9658ba47 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -6,6 +6,7 @@ use crate::{ bindings, + str::CStr, types::{ARef, Opaque}, }; use core::{fmt, ptr}; @@ -180,6 +181,12 @@ impl Device { ) }; } + + /// Checks if property is present or not. + pub fn property_present(&self, name: &CStr) -> bool { + // SAFETY: By the invariant of `CStr`, `name` is null-terminated. + unsafe { bindings::device_property_present(self.as_raw().cast_const(), name.as_char_ptr()) } + } } // SAFETY: Instances of `Device` are always reference-counted. diff --git a/rust/kernel/device_id.rs b/rust/kernel/device_id.rs new file mode 100644 index 000000000000..e5859217a579 --- /dev/null +++ b/rust/kernel/device_id.rs @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Generic implementation of device IDs. +//! +//! Each bus / subsystem that matches device and driver through a bus / subsystem specific ID is +//! expected to implement [`RawDeviceId`]. + +use core::mem::MaybeUninit; + +/// Marker trait to indicate a Rust device ID type represents a corresponding C device ID type. +/// +/// This is meant to be implemented by buses/subsystems so that they can use [`IdTable`] to +/// guarantee (at compile-time) zero-termination of device id tables provided by drivers. +/// +/// # Safety +/// +/// Implementers must ensure that: +/// - `Self` is layout-compatible with [`RawDeviceId::RawType`]; i.e. it's safe to transmute to +/// `RawDeviceId`. +/// +/// This requirement is needed so `IdArray::new` can convert `Self` to `RawType` when building +/// the ID table. +/// +/// Ideally, this should be achieved using a const function that does conversion instead of +/// transmute; however, const trait functions relies on `const_trait_impl` unstable feature, +/// which is broken/gone in Rust 1.73. +/// +/// - `DRIVER_DATA_OFFSET` is the offset of context/data field of the device ID (usually named +/// `driver_data`) of the device ID, the field is suitable sized to write a `usize` value. +/// +/// Similar to the previous requirement, the data should ideally be added during `Self` to +/// `RawType` conversion, but there's currently no way to do it when using traits in const. +pub unsafe trait RawDeviceId { + /// The raw type that holds the device id. + /// + /// Id tables created from [`Self`] are going to hold this type in its zero-terminated array. + type RawType: Copy; + + /// The offset to the context/data field. + const DRIVER_DATA_OFFSET: usize; + + /// The index stored at `DRIVER_DATA_OFFSET` of the implementor of the [`RawDeviceId`] trait. + fn index(&self) -> usize; +} + +/// A zero-terminated device id array. +#[repr(C)] +pub struct RawIdArray<T: RawDeviceId, const N: usize> { + ids: [T::RawType; N], + sentinel: MaybeUninit<T::RawType>, +} + +impl<T: RawDeviceId, const N: usize> RawIdArray<T, N> { + #[doc(hidden)] + pub const fn size(&self) -> usize { + core::mem::size_of::<Self>() + } +} + +/// A zero-terminated device id array, followed by context data. +#[repr(C)] +pub struct IdArray<T: RawDeviceId, U, const N: usize> { + raw_ids: RawIdArray<T, N>, + id_infos: [U; N], +} + +impl<T: RawDeviceId, U, const N: usize> IdArray<T, U, N> { + /// Creates a new instance of the array. + /// + /// The contents are derived from the given identifiers and context information. + pub const fn new(ids: [(T, U); N]) -> Self { + let mut raw_ids = [const { MaybeUninit::<T::RawType>::uninit() }; N]; + let mut infos = [const { MaybeUninit::uninit() }; N]; + + let mut i = 0usize; + while i < N { + // SAFETY: by the safety requirement of `RawDeviceId`, we're guaranteed that `T` is + // layout-wise compatible with `RawType`. + raw_ids[i] = unsafe { core::mem::transmute_copy(&ids[i].0) }; + // SAFETY: by the safety requirement of `RawDeviceId`, this would be effectively + // `raw_ids[i].driver_data = i;`. + unsafe { + raw_ids[i] + .as_mut_ptr() + .byte_offset(T::DRIVER_DATA_OFFSET as _) + .cast::<usize>() + .write(i); + } + + // SAFETY: this is effectively a move: `infos[i] = ids[i].1`. We make a copy here but + // later forget `ids`. + infos[i] = MaybeUninit::new(unsafe { core::ptr::read(&ids[i].1) }); + i += 1; + } + + core::mem::forget(ids); + + Self { + raw_ids: RawIdArray { + // SAFETY: this is effectively `array_assume_init`, which is unstable, so we use + // `transmute_copy` instead. We have initialized all elements of `raw_ids` so this + // `array_assume_init` is safe. + ids: unsafe { core::mem::transmute_copy(&raw_ids) }, + sentinel: MaybeUninit::zeroed(), + }, + // SAFETY: We have initialized all elements of `infos` so this `array_assume_init` is + // safe. + id_infos: unsafe { core::mem::transmute_copy(&infos) }, + } + } + + /// Reference to the contained [`RawIdArray`]. + pub const fn raw_ids(&self) -> &RawIdArray<T, N> { + &self.raw_ids + } +} + +/// A device id table. +/// +/// This trait is only implemented by `IdArray`. +/// +/// The purpose of this trait is to allow `&'static dyn IdArray<T, U>` to be in context when `N` in +/// `IdArray` doesn't matter. +pub trait IdTable<T: RawDeviceId, U> { + /// Obtain the pointer to the ID table. + fn as_ptr(&self) -> *const T::RawType; + + /// Obtain the pointer to the bus specific device ID from an index. + fn id(&self, index: usize) -> &T::RawType; + + /// Obtain the pointer to the driver-specific information from an index. + fn info(&self, index: usize) -> &U; +} + +impl<T: RawDeviceId, U, const N: usize> IdTable<T, U> for IdArray<T, U, N> { + fn as_ptr(&self) -> *const T::RawType { + // This cannot be `self.ids.as_ptr()`, as the return pointer must have correct provenance + // to access the sentinel. + (self as *const Self).cast() + } + + fn id(&self, index: usize) -> &T::RawType { + &self.raw_ids.ids[index] + } + + fn info(&self, index: usize) -> &U { + &self.id_infos[index] + } +} + +/// Create device table alias for modpost. +#[macro_export] +macro_rules! module_device_table { + ($table_type: literal, $module_table_name:ident, $table_name:ident) => { + #[rustfmt::skip] + #[export_name = + concat!("__mod_device_table__", $table_type, + "__", module_path!(), + "_", line!(), + "_", stringify!($table_name)) + ] + static $module_table_name: [core::mem::MaybeUninit<u8>; $table_name.raw_ids().size()] = + unsafe { core::mem::transmute_copy($table_name.raw_ids()) }; + }; +} diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs new file mode 100644 index 000000000000..942376f6f3af --- /dev/null +++ b/rust/kernel/devres.rs @@ -0,0 +1,201 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Devres abstraction +//! +//! [`Devres`] represents an abstraction for the kernel devres (device resource management) +//! implementation. + +use crate::{ + alloc::Flags, + bindings, + device::Device, + error::{Error, Result}, + ffi::c_void, + prelude::*, + revocable::Revocable, + sync::Arc, + types::ARef, +}; + +use core::ops::Deref; + +#[pin_data] +struct DevresInner<T> { + dev: ARef<Device>, + callback: unsafe extern "C" fn(*mut c_void), + #[pin] + data: Revocable<T>, +} + +/// This abstraction is meant to be used by subsystems to containerize [`Device`] bound resources to +/// manage their lifetime. +/// +/// [`Device`] bound resources should be freed when either the resource goes out of scope or the +/// [`Device`] is unbound respectively, depending on what happens first. +/// +/// To achieve that [`Devres`] registers a devres callback on creation, which is called once the +/// [`Device`] is unbound, revoking access to the encapsulated resource (see also [`Revocable`]). +/// +/// After the [`Devres`] has been unbound it is not possible to access the encapsulated resource +/// anymore. +/// +/// [`Devres`] users should make sure to simply free the corresponding backing resource in `T`'s +/// [`Drop`] implementation. +/// +/// # Example +/// +/// ```no_run +/// # use kernel::{bindings, c_str, device::Device, devres::Devres, io::{Io, IoRaw}}; +/// # use core::ops::Deref; +/// +/// // See also [`pci::Bar`] for a real example. +/// struct IoMem<const SIZE: usize>(IoRaw<SIZE>); +/// +/// impl<const SIZE: usize> IoMem<SIZE> { +/// /// # Safety +/// /// +/// /// [`paddr`, `paddr` + `SIZE`) must be a valid MMIO region that is mappable into the CPUs +/// /// virtual address space. +/// unsafe fn new(paddr: usize) -> Result<Self>{ +/// // SAFETY: By the safety requirements of this function [`paddr`, `paddr` + `SIZE`) is +/// // valid for `ioremap`. +/// let addr = unsafe { bindings::ioremap(paddr as _, SIZE as _) }; +/// if addr.is_null() { +/// return Err(ENOMEM); +/// } +/// +/// Ok(IoMem(IoRaw::new(addr as _, SIZE)?)) +/// } +/// } +/// +/// impl<const SIZE: usize> Drop for IoMem<SIZE> { +/// fn drop(&mut self) { +/// // SAFETY: `self.0.addr()` is guaranteed to be properly mapped by `Self::new`. +/// unsafe { bindings::iounmap(self.0.addr() as _); }; +/// } +/// } +/// +/// impl<const SIZE: usize> Deref for IoMem<SIZE> { +/// type Target = Io<SIZE>; +/// +/// fn deref(&self) -> &Self::Target { +/// // SAFETY: The memory range stored in `self` has been properly mapped in `Self::new`. +/// unsafe { Io::from_raw(&self.0) } +/// } +/// } +/// # fn no_run() -> Result<(), Error> { +/// # // SAFETY: Invalid usage; just for the example to get an `ARef<Device>` instance. +/// # let dev = unsafe { Device::get_device(core::ptr::null_mut()) }; +/// +/// // SAFETY: Invalid usage for example purposes. +/// let iomem = unsafe { IoMem::<{ core::mem::size_of::<u32>() }>::new(0xBAAAAAAD)? }; +/// let devres = Devres::new(&dev, iomem, GFP_KERNEL)?; +/// +/// let res = devres.try_access().ok_or(ENXIO)?; +/// res.writel(0x42, 0x0); +/// # Ok(()) +/// # } +/// ``` +pub struct Devres<T>(Arc<DevresInner<T>>); + +impl<T> DevresInner<T> { + fn new(dev: &Device, data: T, flags: Flags) -> Result<Arc<DevresInner<T>>> { + let inner = Arc::pin_init( + pin_init!( DevresInner { + dev: dev.into(), + callback: Self::devres_callback, + data <- Revocable::new(data), + }), + flags, + )?; + + // Convert `Arc<DevresInner>` into a raw pointer and make devres own this reference until + // `Self::devres_callback` is called. + let data = inner.clone().into_raw(); + + // SAFETY: `devm_add_action` guarantees to call `Self::devres_callback` once `dev` is + // detached. + let ret = + unsafe { bindings::devm_add_action(dev.as_raw(), Some(inner.callback), data as _) }; + + if ret != 0 { + // SAFETY: We just created another reference to `inner` in order to pass it to + // `bindings::devm_add_action`. If `bindings::devm_add_action` fails, we have to drop + // this reference accordingly. + let _ = unsafe { Arc::from_raw(data) }; + return Err(Error::from_errno(ret)); + } + + Ok(inner) + } + + fn as_ptr(&self) -> *const Self { + self as _ + } + + fn remove_action(this: &Arc<Self>) { + // SAFETY: + // - `self.inner.dev` is a valid `Device`, + // - the `action` and `data` pointers are the exact same ones as given to devm_add_action() + // previously, + // - `self` is always valid, even if the action has been released already. + let ret = unsafe { + bindings::devm_remove_action_nowarn( + this.dev.as_raw(), + Some(this.callback), + this.as_ptr() as _, + ) + }; + + if ret == 0 { + // SAFETY: We leaked an `Arc` reference to devm_add_action() in `DevresInner::new`; if + // devm_remove_action_nowarn() was successful we can (and have to) claim back ownership + // of this reference. + let _ = unsafe { Arc::from_raw(this.as_ptr()) }; + } + } + + #[allow(clippy::missing_safety_doc)] + unsafe extern "C" fn devres_callback(ptr: *mut kernel::ffi::c_void) { + let ptr = ptr as *mut DevresInner<T>; + // Devres owned this memory; now that we received the callback, drop the `Arc` and hence the + // reference. + // SAFETY: Safe, since we leaked an `Arc` reference to devm_add_action() in + // `DevresInner::new`. + let inner = unsafe { Arc::from_raw(ptr) }; + + inner.data.revoke(); + } +} + +impl<T> Devres<T> { + /// Creates a new [`Devres`] instance of the given `data`. The `data` encapsulated within the + /// returned `Devres` instance' `data` will be revoked once the device is detached. + pub fn new(dev: &Device, data: T, flags: Flags) -> Result<Self> { + let inner = DevresInner::new(dev, data, flags)?; + + Ok(Devres(inner)) + } + + /// Same as [`Devres::new`], but does not return a `Devres` instance. Instead the given `data` + /// is owned by devres and will be revoked / dropped, once the device is detached. + pub fn new_foreign_owned(dev: &Device, data: T, flags: Flags) -> Result { + let _ = DevresInner::new(dev, data, flags)?; + + Ok(()) + } +} + +impl<T> Deref for Devres<T> { + type Target = Revocable<T>; + + fn deref(&self) -> &Self::Target { + &self.0.data + } +} + +impl<T> Drop for Devres<T> { + fn drop(&mut self) { + DevresInner::remove_action(&self.0); + } +} diff --git a/rust/kernel/driver.rs b/rust/kernel/driver.rs new file mode 100644 index 000000000000..2a16d5e64e6c --- /dev/null +++ b/rust/kernel/driver.rs @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Generic support for drivers of different buses (e.g., PCI, Platform, Amba, etc.). +//! +//! Each bus / subsystem is expected to implement [`RegistrationOps`], which allows drivers to +//! register using the [`Registration`] class. + +use crate::error::{Error, Result}; +use crate::{device, init::PinInit, of, str::CStr, try_pin_init, types::Opaque, ThisModule}; +use core::pin::Pin; +use macros::{pin_data, pinned_drop}; + +/// The [`RegistrationOps`] trait serves as generic interface for subsystems (e.g., PCI, Platform, +/// Amba, etc.) to provide the corresponding subsystem specific implementation to register / +/// unregister a driver of the particular type (`RegType`). +/// +/// For instance, the PCI subsystem would set `RegType` to `bindings::pci_driver` and call +/// `bindings::__pci_register_driver` from `RegistrationOps::register` and +/// `bindings::pci_unregister_driver` from `RegistrationOps::unregister`. +/// +/// # Safety +/// +/// A call to [`RegistrationOps::unregister`] for a given instance of `RegType` is only valid if a +/// preceding call to [`RegistrationOps::register`] has been successful. +pub unsafe trait RegistrationOps { + /// The type that holds information about the registration. This is typically a struct defined + /// by the C portion of the kernel. + type RegType: Default; + + /// Registers a driver. + /// + /// # Safety + /// + /// On success, `reg` must remain pinned and valid until the matching call to + /// [`RegistrationOps::unregister`]. + unsafe fn register( + reg: &Opaque<Self::RegType>, + name: &'static CStr, + module: &'static ThisModule, + ) -> Result; + + /// Unregisters a driver previously registered with [`RegistrationOps::register`]. + /// + /// # Safety + /// + /// Must only be called after a preceding successful call to [`RegistrationOps::register`] for + /// the same `reg`. + unsafe fn unregister(reg: &Opaque<Self::RegType>); +} + +/// A [`Registration`] is a generic type that represents the registration of some driver type (e.g. +/// `bindings::pci_driver`). Therefore a [`Registration`] must be initialized with a type that +/// implements the [`RegistrationOps`] trait, such that the generic `T::register` and +/// `T::unregister` calls result in the subsystem specific registration calls. +/// +///Once the `Registration` structure is dropped, the driver is unregistered. +#[pin_data(PinnedDrop)] +pub struct Registration<T: RegistrationOps> { + #[pin] + reg: Opaque<T::RegType>, +} + +// SAFETY: `Registration` has no fields or methods accessible via `&Registration`, so it is safe to +// share references to it with multiple threads as nothing can be done. +unsafe impl<T: RegistrationOps> Sync for Registration<T> {} + +// SAFETY: Both registration and unregistration are implemented in C and safe to be performed from +// any thread, so `Registration` is `Send`. +unsafe impl<T: RegistrationOps> Send for Registration<T> {} + +impl<T: RegistrationOps> Registration<T> { + /// Creates a new instance of the registration object. + pub fn new(name: &'static CStr, module: &'static ThisModule) -> impl PinInit<Self, Error> { + try_pin_init!(Self { + reg <- Opaque::try_ffi_init(|ptr: *mut T::RegType| { + // SAFETY: `try_ffi_init` guarantees that `ptr` is valid for write. + unsafe { ptr.write(T::RegType::default()) }; + + // SAFETY: `try_ffi_init` guarantees that `ptr` is valid for write, and it has + // just been initialised above, so it's also valid for read. + let drv = unsafe { &*(ptr as *const Opaque<T::RegType>) }; + + // SAFETY: `drv` is guaranteed to be pinned until `T::unregister`. + unsafe { T::register(drv, name, module) } + }), + }) + } +} + +#[pinned_drop] +impl<T: RegistrationOps> PinnedDrop for Registration<T> { + fn drop(self: Pin<&mut Self>) { + // SAFETY: The existence of `self` guarantees that `self.reg` has previously been + // successfully registered with `T::register` + unsafe { T::unregister(&self.reg) }; + } +} + +/// Declares a kernel module that exposes a single driver. +/// +/// It is meant to be used as a helper by other subsystems so they can more easily expose their own +/// macros. +#[macro_export] +macro_rules! module_driver { + (<$gen_type:ident>, $driver_ops:ty, { type: $type:ty, $($f:tt)* }) => { + type Ops<$gen_type> = $driver_ops; + + #[$crate::prelude::pin_data] + struct DriverModule { + #[pin] + _driver: $crate::driver::Registration<Ops<$type>>, + } + + impl $crate::InPlaceModule for DriverModule { + fn init( + module: &'static $crate::ThisModule + ) -> impl $crate::init::PinInit<Self, $crate::error::Error> { + $crate::try_pin_init!(Self { + _driver <- $crate::driver::Registration::new( + <Self as $crate::ModuleMetadata>::NAME, + module, + ), + }) + } + } + + $crate::prelude::module! { + type: DriverModule, + $($f)* + } + } +} + +/// The bus independent adapter to match a drivers and a devices. +/// +/// This trait should be implemented by the bus specific adapter, which represents the connection +/// of a device and a driver. +/// +/// It provides bus independent functions for device / driver interactions. +pub trait Adapter { + /// The type holding driver private data about each device id supported by the driver. + type IdInfo: 'static; + + /// The [`of::IdTable`] of the corresponding driver. + fn of_id_table() -> Option<of::IdTable<Self::IdInfo>>; + + /// Returns the driver's private data from the matching entry in the [`of::IdTable`], if any. + /// + /// If this returns `None`, it means there is no match with an entry in the [`of::IdTable`]. + #[cfg(CONFIG_OF)] + fn of_id_info(dev: &device::Device) -> Option<&'static Self::IdInfo> { + let table = Self::of_id_table()?; + + // SAFETY: + // - `table` has static lifetime, hence it's valid for read, + // - `dev` is guaranteed to be valid while it's alive, and so is `pdev.as_ref().as_raw()`. + let raw_id = unsafe { bindings::of_match_device(table.as_ptr(), dev.as_raw()) }; + + if raw_id.is_null() { + None + } else { + // SAFETY: `DeviceId` is a `#[repr(transparent)` wrapper of `struct of_device_id` and + // does not add additional invariants, so it's safe to transmute. + let id = unsafe { &*raw_id.cast::<of::DeviceId>() }; + + Some(table.info(<of::DeviceId as crate::device_id::RawDeviceId>::index(id))) + } + } + + #[cfg(not(CONFIG_OF))] + #[allow(missing_docs)] + fn of_id_info(_dev: &device::Device) -> Option<&'static Self::IdInfo> { + None + } + + /// Returns the driver's private data from the matching entry of any of the ID tables, if any. + /// + /// If this returns `None`, it means that there is no match in any of the ID tables directly + /// associated with a [`device::Device`]. + fn id_info(dev: &device::Device) -> Option<&'static Self::IdInfo> { + let id = Self::of_id_info(dev); + if id.is_some() { + return id; + } + + None + } +} diff --git a/rust/kernel/io.rs b/rust/kernel/io.rs new file mode 100644 index 000000000000..d4a73e52e3ee --- /dev/null +++ b/rust/kernel/io.rs @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Memory-mapped IO. +//! +//! C header: [`include/asm-generic/io.h`](srctree/include/asm-generic/io.h) + +use crate::error::{code::EINVAL, Result}; +use crate::{bindings, build_assert}; + +/// Raw representation of an MMIO region. +/// +/// By itself, the existence of an instance of this structure does not provide any guarantees that +/// the represented MMIO region does exist or is properly mapped. +/// +/// Instead, the bus specific MMIO implementation must convert this raw representation into an `Io` +/// instance providing the actual memory accessors. Only by the conversion into an `Io` structure +/// any guarantees are given. +pub struct IoRaw<const SIZE: usize = 0> { + addr: usize, + maxsize: usize, +} + +impl<const SIZE: usize> IoRaw<SIZE> { + /// Returns a new `IoRaw` instance on success, an error otherwise. + pub fn new(addr: usize, maxsize: usize) -> Result<Self> { + if maxsize < SIZE { + return Err(EINVAL); + } + + Ok(Self { addr, maxsize }) + } + + /// Returns the base address of the MMIO region. + #[inline] + pub fn addr(&self) -> usize { + self.addr + } + + /// Returns the maximum size of the MMIO region. + #[inline] + pub fn maxsize(&self) -> usize { + self.maxsize + } +} + +/// IO-mapped memory, starting at the base address @addr and spanning @maxlen bytes. +/// +/// The creator (usually a subsystem / bus such as PCI) is responsible for creating the +/// mapping, performing an additional region request etc. +/// +/// # Invariant +/// +/// `addr` is the start and `maxsize` the length of valid I/O mapped memory region of size +/// `maxsize`. +/// +/// # Examples +/// +/// ```no_run +/// # use kernel::{bindings, io::{Io, IoRaw}}; +/// # use core::ops::Deref; +/// +/// // See also [`pci::Bar`] for a real example. +/// struct IoMem<const SIZE: usize>(IoRaw<SIZE>); +/// +/// impl<const SIZE: usize> IoMem<SIZE> { +/// /// # Safety +/// /// +/// /// [`paddr`, `paddr` + `SIZE`) must be a valid MMIO region that is mappable into the CPUs +/// /// virtual address space. +/// unsafe fn new(paddr: usize) -> Result<Self>{ +/// // SAFETY: By the safety requirements of this function [`paddr`, `paddr` + `SIZE`) is +/// // valid for `ioremap`. +/// let addr = unsafe { bindings::ioremap(paddr as _, SIZE as _) }; +/// if addr.is_null() { +/// return Err(ENOMEM); +/// } +/// +/// Ok(IoMem(IoRaw::new(addr as _, SIZE)?)) +/// } +/// } +/// +/// impl<const SIZE: usize> Drop for IoMem<SIZE> { +/// fn drop(&mut self) { +/// // SAFETY: `self.0.addr()` is guaranteed to be properly mapped by `Self::new`. +/// unsafe { bindings::iounmap(self.0.addr() as _); }; +/// } +/// } +/// +/// impl<const SIZE: usize> Deref for IoMem<SIZE> { +/// type Target = Io<SIZE>; +/// +/// fn deref(&self) -> &Self::Target { +/// // SAFETY: The memory range stored in `self` has been properly mapped in `Self::new`. +/// unsafe { Io::from_raw(&self.0) } +/// } +/// } +/// +///# fn no_run() -> Result<(), Error> { +/// // SAFETY: Invalid usage for example purposes. +/// let iomem = unsafe { IoMem::<{ core::mem::size_of::<u32>() }>::new(0xBAAAAAAD)? }; +/// iomem.writel(0x42, 0x0); +/// assert!(iomem.try_writel(0x42, 0x0).is_ok()); +/// assert!(iomem.try_writel(0x42, 0x4).is_err()); +/// # Ok(()) +/// # } +/// ``` +#[repr(transparent)] +pub struct Io<const SIZE: usize = 0>(IoRaw<SIZE>); + +macro_rules! define_read { + ($(#[$attr:meta])* $name:ident, $try_name:ident, $type_name:ty) => { + /// Read IO data from a given offset known at compile time. + /// + /// Bound checks are performed on compile time, hence if the offset is not known at compile + /// time, the build will fail. + $(#[$attr])* + #[inline] + pub fn $name(&self, offset: usize) -> $type_name { + let addr = self.io_addr_assert::<$type_name>(offset); + + // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. + unsafe { bindings::$name(addr as _) } + } + + /// Read IO data from a given offset. + /// + /// Bound checks are performed on runtime, it fails if the offset (plus the type size) is + /// out of bounds. + $(#[$attr])* + pub fn $try_name(&self, offset: usize) -> Result<$type_name> { + let addr = self.io_addr::<$type_name>(offset)?; + + // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. + Ok(unsafe { bindings::$name(addr as _) }) + } + }; +} + +macro_rules! define_write { + ($(#[$attr:meta])* $name:ident, $try_name:ident, $type_name:ty) => { + /// Write IO data from a given offset known at compile time. + /// + /// Bound checks are performed on compile time, hence if the offset is not known at compile + /// time, the build will fail. + $(#[$attr])* + #[inline] + pub fn $name(&self, value: $type_name, offset: usize) { + let addr = self.io_addr_assert::<$type_name>(offset); + + // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. + unsafe { bindings::$name(value, addr as _, ) } + } + + /// Write IO data from a given offset. + /// + /// Bound checks are performed on runtime, it fails if the offset (plus the type size) is + /// out of bounds. + $(#[$attr])* + pub fn $try_name(&self, value: $type_name, offset: usize) -> Result { + let addr = self.io_addr::<$type_name>(offset)?; + + // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. + unsafe { bindings::$name(value, addr as _) } + Ok(()) + } + }; +} + +impl<const SIZE: usize> Io<SIZE> { + /// Converts an `IoRaw` into an `Io` instance, providing the accessors to the MMIO mapping. + /// + /// # Safety + /// + /// Callers must ensure that `addr` is the start of a valid I/O mapped memory region of size + /// `maxsize`. + pub unsafe fn from_raw(raw: &IoRaw<SIZE>) -> &Self { + // SAFETY: `Io` is a transparent wrapper around `IoRaw`. + unsafe { &*core::ptr::from_ref(raw).cast() } + } + + /// Returns the base address of this mapping. + #[inline] + pub fn addr(&self) -> usize { + self.0.addr() + } + + /// Returns the maximum size of this mapping. + #[inline] + pub fn maxsize(&self) -> usize { + self.0.maxsize() + } + + #[inline] + const fn offset_valid<U>(offset: usize, size: usize) -> bool { + let type_size = core::mem::size_of::<U>(); + if let Some(end) = offset.checked_add(type_size) { + end <= size && offset % type_size == 0 + } else { + false + } + } + + #[inline] + fn io_addr<U>(&self, offset: usize) -> Result<usize> { + if !Self::offset_valid::<U>(offset, self.maxsize()) { + return Err(EINVAL); + } + + // Probably no need to check, since the safety requirements of `Self::new` guarantee that + // this can't overflow. + self.addr().checked_add(offset).ok_or(EINVAL) + } + + #[inline] + fn io_addr_assert<U>(&self, offset: usize) -> usize { + build_assert!(Self::offset_valid::<U>(offset, SIZE)); + + self.addr() + offset + } + + define_read!(readb, try_readb, u8); + define_read!(readw, try_readw, u16); + define_read!(readl, try_readl, u32); + define_read!( + #[cfg(CONFIG_64BIT)] + readq, + try_readq, + u64 + ); + + define_read!(readb_relaxed, try_readb_relaxed, u8); + define_read!(readw_relaxed, try_readw_relaxed, u16); + define_read!(readl_relaxed, try_readl_relaxed, u32); + define_read!( + #[cfg(CONFIG_64BIT)] + readq_relaxed, + try_readq_relaxed, + u64 + ); + + define_write!(writeb, try_writeb, u8); + define_write!(writew, try_writew, u16); + define_write!(writel, try_writel, u32); + define_write!( + #[cfg(CONFIG_64BIT)] + writeq, + try_writeq, + u64 + ); + + define_write!(writeb_relaxed, try_writeb_relaxed, u8); + define_write!(writew_relaxed, try_writew_relaxed, u16); + define_write!(writel_relaxed, try_writel_relaxed, u32); + define_write!( + #[cfg(CONFIG_64BIT)] + writeq_relaxed, + try_writeq_relaxed, + u64 + ); +} diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 545d1170ee63..496ed32b0911 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -19,6 +19,11 @@ #![cfg_attr(not(CONFIG_RUSTC_HAS_COERCE_POINTEE), feature(unsize))] #![feature(inline_const)] #![feature(lint_reasons)] +// Stable in Rust 1.83 +#![feature(const_maybe_uninit_as_mut_ptr)] +#![feature(const_mut_refs)] +#![feature(const_ptr_write)] +#![feature(const_refs_to_cell)] // Ensure conditional compilation based on the kernel configuration works; // otherwise we may silently break things like initcall handling. @@ -37,11 +42,15 @@ pub mod block; pub mod build_assert; pub mod cred; pub mod device; +pub mod device_id; +pub mod devres; +pub mod driver; pub mod error; #[cfg(CONFIG_RUST_FW_LOADER_ABSTRACTIONS)] pub mod firmware; pub mod fs; pub mod init; +pub mod io; pub mod ioctl; pub mod jump_label; #[cfg(CONFIG_KUNIT)] @@ -50,11 +59,16 @@ pub mod list; pub mod miscdevice; #[cfg(CONFIG_NET)] pub mod net; +pub mod of; pub mod page; +#[cfg(CONFIG_PCI)] +pub mod pci; pub mod pid_namespace; +pub mod platform; pub mod prelude; pub mod print; pub mod rbtree; +pub mod revocable; pub mod security; pub mod seq_file; pub mod sizes; @@ -115,6 +129,12 @@ impl<T: Module> InPlaceModule for T { } } +/// Metadata attached to a [`Module`] or [`InPlaceModule`]. +pub trait ModuleMetadata { + /// The name of the module as specified in the `module!` macro. + const NAME: &'static crate::str::CStr; +} + /// Equivalent to `THIS_MODULE` in the C API. /// /// C header: [`include/linux/init.h`](srctree/include/linux/init.h) diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs index b3a6cc50b240..e14433b2ab9d 100644 --- a/rust/kernel/miscdevice.rs +++ b/rust/kernel/miscdevice.rs @@ -10,9 +10,12 @@ use crate::{ bindings, + device::Device, error::{to_result, Error, Result, VTABLE_DEFAULT_ERROR}, ffi::{c_int, c_long, c_uint, c_ulong}, + fs::File, prelude::*, + seq_file::SeqFile, str::CStr, types::{ForeignOwnable, Opaque}, }; @@ -80,6 +83,16 @@ impl<T: MiscDevice> MiscDeviceRegistration<T> { pub fn as_raw(&self) -> *mut bindings::miscdevice { self.inner.get() } + + /// Access the `this_device` field. + pub fn device(&self) -> &Device { + // SAFETY: This can only be called after a successful register(), which always + // initialises `this_device` with a valid device. Furthermore, the signature of this + // function tells the borrow-checker that the `&Device` reference must not outlive the + // `&MiscDeviceRegistration<T>` used to obtain it, so the last use of the reference must be + // before the underlying `struct miscdevice` is destroyed. + unsafe { Device::as_ref((*self.as_raw()).this_device) } + } } #[pinned_drop] @@ -92,17 +105,17 @@ impl<T> PinnedDrop for MiscDeviceRegistration<T> { /// Trait implemented by the private data of an open misc device. #[vtable] -pub trait MiscDevice { +pub trait MiscDevice: Sized { /// What kind of pointer should `Self` be wrapped in. type Ptr: ForeignOwnable + Send + Sync; /// Called when the misc device is opened. /// /// The returned pointer will be stored as the private data for the file. - fn open() -> Result<Self::Ptr>; + fn open(_file: &File, _misc: &MiscDeviceRegistration<Self>) -> Result<Self::Ptr>; /// Called when the misc device is released. - fn release(device: Self::Ptr) { + fn release(device: Self::Ptr, _file: &File) { drop(device); } @@ -113,6 +126,7 @@ pub trait MiscDevice { /// [`kernel::ioctl`]: mod@crate::ioctl fn ioctl( _device: <Self::Ptr as ForeignOwnable>::Borrowed<'_>, + _file: &File, _cmd: u32, _arg: usize, ) -> Result<isize> { @@ -129,11 +143,21 @@ pub trait MiscDevice { #[cfg(CONFIG_COMPAT)] fn compat_ioctl( _device: <Self::Ptr as ForeignOwnable>::Borrowed<'_>, + _file: &File, _cmd: u32, _arg: usize, ) -> Result<isize> { build_error!(VTABLE_DEFAULT_ERROR) } + + /// Show info for this fd. + fn show_fdinfo( + _device: <Self::Ptr as ForeignOwnable>::Borrowed<'_>, + _m: &SeqFile, + _file: &File, + ) { + build_error!(VTABLE_DEFAULT_ERROR) + } } const fn create_vtable<T: MiscDevice>() -> &'static bindings::file_operations { @@ -161,6 +185,7 @@ const fn create_vtable<T: MiscDevice>() -> &'static bindings::file_operations { } else { None }, + show_fdinfo: maybe_fn(T::HAS_SHOW_FDINFO, fops_show_fdinfo::<T>), // SAFETY: All zeros is a valid value for `bindings::file_operations`. ..unsafe { MaybeUninit::zeroed().assume_init() } }; @@ -175,21 +200,38 @@ const fn create_vtable<T: MiscDevice>() -> &'static bindings::file_operations { /// The file must be associated with a `MiscDeviceRegistration<T>`. unsafe extern "C" fn fops_open<T: MiscDevice>( inode: *mut bindings::inode, - file: *mut bindings::file, + raw_file: *mut bindings::file, ) -> c_int { // SAFETY: The pointers are valid and for a file being opened. - let ret = unsafe { bindings::generic_file_open(inode, file) }; + let ret = unsafe { bindings::generic_file_open(inode, raw_file) }; if ret != 0 { return ret; } - let ptr = match T::open() { + // SAFETY: The open call of a file can access the private data. + let misc_ptr = unsafe { (*raw_file).private_data }; + + // SAFETY: This is a miscdevice, so `misc_open()` set the private data to a pointer to the + // associated `struct miscdevice` before calling into this method. Furthermore, `misc_open()` + // ensures that the miscdevice can't be unregistered and freed during this call to `fops_open`. + let misc = unsafe { &*misc_ptr.cast::<MiscDeviceRegistration<T>>() }; + + // SAFETY: + // * This underlying file is valid for (much longer than) the duration of `T::open`. + // * There is no active fdget_pos region on the file on this thread. + let file = unsafe { File::from_raw_file(raw_file) }; + + let ptr = match T::open(file, misc) { Ok(ptr) => ptr, Err(err) => return err.to_errno(), }; - // SAFETY: The open call of a file owns the private data. - unsafe { (*file).private_data = ptr.into_foreign() }; + // This overwrites the private data with the value specified by the user, changing the type of + // this file's private data. All future accesses to the private data is performed by other + // fops_* methods in this file, which all correctly cast the private data to the new type. + // + // SAFETY: The open call of a file can access the private data. + unsafe { (*raw_file).private_data = ptr.into_foreign() }; 0 } @@ -207,7 +249,10 @@ unsafe extern "C" fn fops_release<T: MiscDevice>( // SAFETY: The release call of a file owns the private data. let ptr = unsafe { <T::Ptr as ForeignOwnable>::from_foreign(private) }; - T::release(ptr); + // SAFETY: + // * The file is valid for the duration of this call. + // * There is no active fdget_pos region on the file on this thread. + T::release(ptr, unsafe { File::from_raw_file(file) }); 0 } @@ -225,7 +270,12 @@ unsafe extern "C" fn fops_ioctl<T: MiscDevice>( // SAFETY: Ioctl calls can borrow the private data of the file. let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) }; - match T::ioctl(device, cmd, arg) { + // SAFETY: + // * The file is valid for the duration of this call. + // * There is no active fdget_pos region on the file on this thread. + let file = unsafe { File::from_raw_file(file) }; + + match T::ioctl(device, file, cmd, arg) { Ok(ret) => ret as c_long, Err(err) => err.to_errno() as c_long, } @@ -245,8 +295,36 @@ unsafe extern "C" fn fops_compat_ioctl<T: MiscDevice>( // SAFETY: Ioctl calls can borrow the private data of the file. let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) }; - match T::compat_ioctl(device, cmd, arg) { + // SAFETY: + // * The file is valid for the duration of this call. + // * There is no active fdget_pos region on the file on this thread. + let file = unsafe { File::from_raw_file(file) }; + + match T::compat_ioctl(device, file, cmd, arg) { Ok(ret) => ret as c_long, Err(err) => err.to_errno() as c_long, } } + +/// # Safety +/// +/// - `file` must be a valid file that is associated with a `MiscDeviceRegistration<T>`. +/// - `seq_file` must be a valid `struct seq_file` that we can write to. +unsafe extern "C" fn fops_show_fdinfo<T: MiscDevice>( + seq_file: *mut bindings::seq_file, + file: *mut bindings::file, +) { + // SAFETY: The release call of a file owns the private data. + let private = unsafe { (*file).private_data }; + // SAFETY: Ioctl calls can borrow the private data of the file. + let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) }; + // SAFETY: + // * The file is valid for the duration of this call. + // * There is no active fdget_pos region on the file on this thread. + let file = unsafe { File::from_raw_file(file) }; + // SAFETY: The caller ensures that the pointer is valid and exclusive for the duration in which + // this method is called. + let m = unsafe { SeqFile::from_raw(seq_file) }; + + T::show_fdinfo(device, m, file); +} diff --git a/rust/kernel/of.rs b/rust/kernel/of.rs new file mode 100644 index 000000000000..04f2d8ef29cb --- /dev/null +++ b/rust/kernel/of.rs @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Device Tree / Open Firmware abstractions. + +use crate::{bindings, device_id::RawDeviceId, prelude::*}; + +/// IdTable type for OF drivers. +pub type IdTable<T> = &'static dyn kernel::device_id::IdTable<DeviceId, T>; + +/// An open firmware device id. +#[repr(transparent)] +#[derive(Clone, Copy)] +pub struct DeviceId(bindings::of_device_id); + +// SAFETY: +// * `DeviceId` is a `#[repr(transparent)` wrapper of `struct of_device_id` and does not add +// additional invariants, so it's safe to transmute to `RawType`. +// * `DRIVER_DATA_OFFSET` is the offset to the `data` field. +unsafe impl RawDeviceId for DeviceId { + type RawType = bindings::of_device_id; + + const DRIVER_DATA_OFFSET: usize = core::mem::offset_of!(bindings::of_device_id, data); + + fn index(&self) -> usize { + self.0.data as _ + } +} + +impl DeviceId { + /// Create a new device id from an OF 'compatible' string. + pub const fn new(compatible: &'static CStr) -> Self { + let src = compatible.as_bytes_with_nul(); + // Replace with `bindings::of_device_id::default()` once stabilized for `const`. + // SAFETY: FFI type is valid to be zero-initialized. + let mut of: bindings::of_device_id = unsafe { core::mem::zeroed() }; + + // TODO: Use `clone_from_slice` once the corresponding types do match. + let mut i = 0; + while i < src.len() { + of.compatible[i] = src[i] as _; + i += 1; + } + + Self(of) + } +} + +/// Create an OF `IdTable` with an "alias" for modpost. +#[macro_export] +macro_rules! of_device_table { + ($table_name:ident, $module_table_name:ident, $id_info_type: ty, $table_data: expr) => { + const $table_name: $crate::device_id::IdArray< + $crate::of::DeviceId, + $id_info_type, + { $table_data.len() }, + > = $crate::device_id::IdArray::new($table_data); + + $crate::module_device_table!("of", $module_table_name, $table_name); + }; +} diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs new file mode 100644 index 000000000000..4c98b5b9aa1e --- /dev/null +++ b/rust/kernel/pci.rs @@ -0,0 +1,434 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Abstractions for the PCI bus. +//! +//! C header: [`include/linux/pci.h`](srctree/include/linux/pci.h) + +use crate::{ + alloc::flags::*, + bindings, container_of, device, + device_id::RawDeviceId, + devres::Devres, + driver, + error::{to_result, Result}, + io::Io, + io::IoRaw, + str::CStr, + types::{ARef, ForeignOwnable, Opaque}, + ThisModule, +}; +use core::{ops::Deref, ptr::addr_of_mut}; +use kernel::prelude::*; + +/// An adapter for the registration of PCI drivers. +pub struct Adapter<T: Driver>(T); + +// SAFETY: A call to `unregister` for a given instance of `RegType` is guaranteed to be valid if +// a preceding call to `register` has been successful. +unsafe impl<T: Driver + 'static> driver::RegistrationOps for Adapter<T> { + type RegType = bindings::pci_driver; + + unsafe fn register( + pdrv: &Opaque<Self::RegType>, + name: &'static CStr, + module: &'static ThisModule, + ) -> Result { + // SAFETY: It's safe to set the fields of `struct pci_driver` on initialization. + unsafe { + (*pdrv.get()).name = name.as_char_ptr(); + (*pdrv.get()).probe = Some(Self::probe_callback); + (*pdrv.get()).remove = Some(Self::remove_callback); + (*pdrv.get()).id_table = T::ID_TABLE.as_ptr(); + } + + // SAFETY: `pdrv` is guaranteed to be a valid `RegType`. + to_result(unsafe { + bindings::__pci_register_driver(pdrv.get(), module.0, name.as_char_ptr()) + }) + } + + unsafe fn unregister(pdrv: &Opaque<Self::RegType>) { + // SAFETY: `pdrv` is guaranteed to be a valid `RegType`. + unsafe { bindings::pci_unregister_driver(pdrv.get()) } + } +} + +impl<T: Driver + 'static> Adapter<T> { + extern "C" fn probe_callback( + pdev: *mut bindings::pci_dev, + id: *const bindings::pci_device_id, + ) -> kernel::ffi::c_int { + // SAFETY: The PCI bus only ever calls the probe callback with a valid pointer to a + // `struct pci_dev`. + let dev = unsafe { device::Device::get_device(addr_of_mut!((*pdev).dev)) }; + // SAFETY: `dev` is guaranteed to be embedded in a valid `struct pci_dev` by the call + // above. + let mut pdev = unsafe { Device::from_dev(dev) }; + + // SAFETY: `DeviceId` is a `#[repr(transparent)` wrapper of `struct pci_device_id` and + // does not add additional invariants, so it's safe to transmute. + let id = unsafe { &*id.cast::<DeviceId>() }; + let info = T::ID_TABLE.info(id.index()); + + match T::probe(&mut pdev, info) { + Ok(data) => { + // Let the `struct pci_dev` own a reference of the driver's private data. + // SAFETY: By the type invariant `pdev.as_raw` returns a valid pointer to a + // `struct pci_dev`. + unsafe { bindings::pci_set_drvdata(pdev.as_raw(), data.into_foreign() as _) }; + } + Err(err) => return Error::to_errno(err), + } + + 0 + } + + extern "C" fn remove_callback(pdev: *mut bindings::pci_dev) { + // SAFETY: The PCI bus only ever calls the remove callback with a valid pointer to a + // `struct pci_dev`. + let ptr = unsafe { bindings::pci_get_drvdata(pdev) }; + + // SAFETY: `remove_callback` is only ever called after a successful call to + // `probe_callback`, hence it's guaranteed that `ptr` points to a valid and initialized + // `KBox<T>` pointer created through `KBox::into_foreign`. + let _ = unsafe { KBox::<T>::from_foreign(ptr) }; + } +} + +/// Declares a kernel module that exposes a single PCI driver. +/// +/// # Example +/// +///```ignore +/// kernel::module_pci_driver! { +/// type: MyDriver, +/// name: "Module name", +/// author: "Author name", +/// description: "Description", +/// license: "GPL v2", +/// } +///``` +#[macro_export] +macro_rules! module_pci_driver { +($($f:tt)*) => { + $crate::module_driver!(<T>, $crate::pci::Adapter<T>, { $($f)* }); +}; +} + +/// Abstraction for bindings::pci_device_id. +#[repr(transparent)] +#[derive(Clone, Copy)] +pub struct DeviceId(bindings::pci_device_id); + +impl DeviceId { + const PCI_ANY_ID: u32 = !0; + + /// Equivalent to C's `PCI_DEVICE` macro. + /// + /// Create a new `pci::DeviceId` from a vendor and device ID number. + pub const fn from_id(vendor: u32, device: u32) -> Self { + Self(bindings::pci_device_id { + vendor, + device, + subvendor: DeviceId::PCI_ANY_ID, + subdevice: DeviceId::PCI_ANY_ID, + class: 0, + class_mask: 0, + driver_data: 0, + override_only: 0, + }) + } + + /// Equivalent to C's `PCI_DEVICE_CLASS` macro. + /// + /// Create a new `pci::DeviceId` from a class number and mask. + pub const fn from_class(class: u32, class_mask: u32) -> Self { + Self(bindings::pci_device_id { + vendor: DeviceId::PCI_ANY_ID, + device: DeviceId::PCI_ANY_ID, + subvendor: DeviceId::PCI_ANY_ID, + subdevice: DeviceId::PCI_ANY_ID, + class, + class_mask, + driver_data: 0, + override_only: 0, + }) + } +} + +// SAFETY: +// * `DeviceId` is a `#[repr(transparent)` wrapper of `pci_device_id` and does not add +// additional invariants, so it's safe to transmute to `RawType`. +// * `DRIVER_DATA_OFFSET` is the offset to the `driver_data` field. +unsafe impl RawDeviceId for DeviceId { + type RawType = bindings::pci_device_id; + + const DRIVER_DATA_OFFSET: usize = core::mem::offset_of!(bindings::pci_device_id, driver_data); + + fn index(&self) -> usize { + self.0.driver_data as _ + } +} + +/// IdTable type for PCI +pub type IdTable<T> = &'static dyn kernel::device_id::IdTable<DeviceId, T>; + +/// Create a PCI `IdTable` with its alias for modpost. +#[macro_export] +macro_rules! pci_device_table { + ($table_name:ident, $module_table_name:ident, $id_info_type: ty, $table_data: expr) => { + const $table_name: $crate::device_id::IdArray< + $crate::pci::DeviceId, + $id_info_type, + { $table_data.len() }, + > = $crate::device_id::IdArray::new($table_data); + + $crate::module_device_table!("pci", $module_table_name, $table_name); + }; +} + +/// The PCI driver trait. +/// +/// # Example +/// +///``` +/// # use kernel::{bindings, pci}; +/// +/// struct MyDriver; +/// +/// kernel::pci_device_table!( +/// PCI_TABLE, +/// MODULE_PCI_TABLE, +/// <MyDriver as pci::Driver>::IdInfo, +/// [ +/// (pci::DeviceId::from_id(bindings::PCI_VENDOR_ID_REDHAT, bindings::PCI_ANY_ID as _), ()) +/// ] +/// ); +/// +/// impl pci::Driver for MyDriver { +/// type IdInfo = (); +/// const ID_TABLE: pci::IdTable<Self::IdInfo> = &PCI_TABLE; +/// +/// fn probe( +/// _pdev: &mut pci::Device, +/// _id_info: &Self::IdInfo, +/// ) -> Result<Pin<KBox<Self>>> { +/// Err(ENODEV) +/// } +/// } +///``` +/// Drivers must implement this trait in order to get a PCI driver registered. Please refer to the +/// `Adapter` documentation for an example. +pub trait Driver { + /// The type holding information about each device id supported by the driver. + /// + /// TODO: Use associated_type_defaults once stabilized: + /// + /// type IdInfo: 'static = (); + type IdInfo: 'static; + + /// The table of device ids supported by the driver. + const ID_TABLE: IdTable<Self::IdInfo>; + + /// PCI driver probe. + /// + /// Called when a new platform device is added or discovered. + /// Implementers should attempt to initialize the device here. + fn probe(dev: &mut Device, id_info: &Self::IdInfo) -> Result<Pin<KBox<Self>>>; +} + +/// The PCI device representation. +/// +/// A PCI device is based on an always reference counted `device:Device` instance. Cloning a PCI +/// device, hence, also increments the base device' reference count. +/// +/// # Invariants +/// +/// `Device` hold a valid reference of `ARef<device::Device>` whose underlying `struct device` is a +/// member of a `struct pci_dev`. +#[derive(Clone)] +pub struct Device(ARef<device::Device>); + +/// A PCI BAR to perform I/O-Operations on. +/// +/// # Invariants +/// +/// `Bar` always holds an `IoRaw` inststance that holds a valid pointer to the start of the I/O +/// memory mapped PCI bar and its size. +pub struct Bar<const SIZE: usize = 0> { + pdev: Device, + io: IoRaw<SIZE>, + num: i32, +} + +impl<const SIZE: usize> Bar<SIZE> { + fn new(pdev: Device, num: u32, name: &CStr) -> Result<Self> { + let len = pdev.resource_len(num)?; + if len == 0 { + return Err(ENOMEM); + } + + // Convert to `i32`, since that's what all the C bindings use. + let num = i32::try_from(num)?; + + // SAFETY: + // `pdev` is valid by the invariants of `Device`. + // `num` is checked for validity by a previous call to `Device::resource_len`. + // `name` is always valid. + let ret = unsafe { bindings::pci_request_region(pdev.as_raw(), num, name.as_char_ptr()) }; + if ret != 0 { + return Err(EBUSY); + } + + // SAFETY: + // `pdev` is valid by the invariants of `Device`. + // `num` is checked for validity by a previous call to `Device::resource_len`. + // `name` is always valid. + let ioptr: usize = unsafe { bindings::pci_iomap(pdev.as_raw(), num, 0) } as usize; + if ioptr == 0 { + // SAFETY: + // `pdev` valid by the invariants of `Device`. + // `num` is checked for validity by a previous call to `Device::resource_len`. + unsafe { bindings::pci_release_region(pdev.as_raw(), num) }; + return Err(ENOMEM); + } + + let io = match IoRaw::new(ioptr, len as usize) { + Ok(io) => io, + Err(err) => { + // SAFETY: + // `pdev` is valid by the invariants of `Device`. + // `ioptr` is guaranteed to be the start of a valid I/O mapped memory region. + // `num` is checked for validity by a previous call to `Device::resource_len`. + unsafe { Self::do_release(&pdev, ioptr, num) }; + return Err(err); + } + }; + + Ok(Bar { pdev, io, num }) + } + + /// # Safety + /// + /// `ioptr` must be a valid pointer to the memory mapped PCI bar number `num`. + unsafe fn do_release(pdev: &Device, ioptr: usize, num: i32) { + // SAFETY: + // `pdev` is valid by the invariants of `Device`. + // `ioptr` is valid by the safety requirements. + // `num` is valid by the safety requirements. + unsafe { + bindings::pci_iounmap(pdev.as_raw(), ioptr as _); + bindings::pci_release_region(pdev.as_raw(), num); + } + } + + fn release(&self) { + // SAFETY: The safety requirements are guaranteed by the type invariant of `self.pdev`. + unsafe { Self::do_release(&self.pdev, self.io.addr(), self.num) }; + } +} + +impl Bar { + fn index_is_valid(index: u32) -> bool { + // A `struct pci_dev` owns an array of resources with at most `PCI_NUM_RESOURCES` entries. + index < bindings::PCI_NUM_RESOURCES + } +} + +impl<const SIZE: usize> Drop for Bar<SIZE> { + fn drop(&mut self) { + self.release(); + } +} + +impl<const SIZE: usize> Deref for Bar<SIZE> { + type Target = Io<SIZE>; + + fn deref(&self) -> &Self::Target { + // SAFETY: By the type invariant of `Self`, the MMIO range in `self.io` is properly mapped. + unsafe { Io::from_raw(&self.io) } + } +} + +impl Device { + /// Create a PCI Device instance from an existing `device::Device`. + /// + /// # Safety + /// + /// `dev` must be an `ARef<device::Device>` whose underlying `bindings::device` is a member of + /// a `bindings::pci_dev`. + pub unsafe fn from_dev(dev: ARef<device::Device>) -> Self { + Self(dev) + } + + fn as_raw(&self) -> *mut bindings::pci_dev { + // SAFETY: By the type invariant `self.0.as_raw` is a pointer to the `struct device` + // embedded in `struct pci_dev`. + unsafe { container_of!(self.0.as_raw(), bindings::pci_dev, dev) as _ } + } + + /// Returns the PCI vendor ID. + pub fn vendor_id(&self) -> u16 { + // SAFETY: `self.as_raw` is a valid pointer to a `struct pci_dev`. + unsafe { (*self.as_raw()).vendor } + } + + /// Returns the PCI device ID. + pub fn device_id(&self) -> u16 { + // SAFETY: `self.as_raw` is a valid pointer to a `struct pci_dev`. + unsafe { (*self.as_raw()).device } + } + + /// Enable memory resources for this device. + pub fn enable_device_mem(&self) -> Result { + // SAFETY: `self.as_raw` is guaranteed to be a pointer to a valid `struct pci_dev`. + let ret = unsafe { bindings::pci_enable_device_mem(self.as_raw()) }; + if ret != 0 { + Err(Error::from_errno(ret)) + } else { + Ok(()) + } + } + + /// Enable bus-mastering for this device. + pub fn set_master(&self) { + // SAFETY: `self.as_raw` is guaranteed to be a pointer to a valid `struct pci_dev`. + unsafe { bindings::pci_set_master(self.as_raw()) }; + } + + /// Returns the size of the given PCI bar resource. + pub fn resource_len(&self, bar: u32) -> Result<bindings::resource_size_t> { + if !Bar::index_is_valid(bar) { + return Err(EINVAL); + } + + // SAFETY: + // - `bar` is a valid bar number, as guaranteed by the above call to `Bar::index_is_valid`, + // - by its type invariant `self.as_raw` is always a valid pointer to a `struct pci_dev`. + Ok(unsafe { bindings::pci_resource_len(self.as_raw(), bar.try_into()?) }) + } + + /// Mapps an entire PCI-BAR after performing a region-request on it. I/O operation bound checks + /// can be performed on compile time for offsets (plus the requested type size) < SIZE. + pub fn iomap_region_sized<const SIZE: usize>( + &self, + bar: u32, + name: &CStr, + ) -> Result<Devres<Bar<SIZE>>> { + let bar = Bar::<SIZE>::new(self.clone(), bar, name)?; + let devres = Devres::new(self.as_ref(), bar, GFP_KERNEL)?; + + Ok(devres) + } + + /// Mapps an entire PCI-BAR after performing a region-request on it. + pub fn iomap_region(&self, bar: u32, name: &CStr) -> Result<Devres<Bar>> { + self.iomap_region_sized::<0>(bar, name) + } +} + +impl AsRef<device::Device> for Device { + fn as_ref(&self) -> &device::Device { + &self.0 + } +} diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs new file mode 100644 index 000000000000..50e6b0421813 --- /dev/null +++ b/rust/kernel/platform.rs @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Abstractions for the platform bus. +//! +//! C header: [`include/linux/platform_device.h`](srctree/include/linux/platform_device.h) + +use crate::{ + bindings, container_of, device, driver, + error::{to_result, Result}, + of, + prelude::*, + str::CStr, + types::{ARef, ForeignOwnable, Opaque}, + ThisModule, +}; + +use core::ptr::addr_of_mut; + +/// An adapter for the registration of platform drivers. +pub struct Adapter<T: Driver>(T); + +// SAFETY: A call to `unregister` for a given instance of `RegType` is guaranteed to be valid if +// a preceding call to `register` has been successful. +unsafe impl<T: Driver + 'static> driver::RegistrationOps for Adapter<T> { + type RegType = bindings::platform_driver; + + unsafe fn register( + pdrv: &Opaque<Self::RegType>, + name: &'static CStr, + module: &'static ThisModule, + ) -> Result { + let of_table = match T::OF_ID_TABLE { + Some(table) => table.as_ptr(), + None => core::ptr::null(), + }; + + // SAFETY: It's safe to set the fields of `struct platform_driver` on initialization. + unsafe { + (*pdrv.get()).driver.name = name.as_char_ptr(); + (*pdrv.get()).probe = Some(Self::probe_callback); + (*pdrv.get()).remove = Some(Self::remove_callback); + (*pdrv.get()).driver.of_match_table = of_table; + } + + // SAFETY: `pdrv` is guaranteed to be a valid `RegType`. + to_result(unsafe { bindings::__platform_driver_register(pdrv.get(), module.0) }) + } + + unsafe fn unregister(pdrv: &Opaque<Self::RegType>) { + // SAFETY: `pdrv` is guaranteed to be a valid `RegType`. + unsafe { bindings::platform_driver_unregister(pdrv.get()) }; + } +} + +impl<T: Driver + 'static> Adapter<T> { + extern "C" fn probe_callback(pdev: *mut bindings::platform_device) -> kernel::ffi::c_int { + // SAFETY: The platform bus only ever calls the probe callback with a valid `pdev`. + let dev = unsafe { device::Device::get_device(addr_of_mut!((*pdev).dev)) }; + // SAFETY: `dev` is guaranteed to be embedded in a valid `struct platform_device` by the + // call above. + let mut pdev = unsafe { Device::from_dev(dev) }; + + let info = <Self as driver::Adapter>::id_info(pdev.as_ref()); + match T::probe(&mut pdev, info) { + Ok(data) => { + // Let the `struct platform_device` own a reference of the driver's private data. + // SAFETY: By the type invariant `pdev.as_raw` returns a valid pointer to a + // `struct platform_device`. + unsafe { bindings::platform_set_drvdata(pdev.as_raw(), data.into_foreign() as _) }; + } + Err(err) => return Error::to_errno(err), + } + + 0 + } + + extern "C" fn remove_callback(pdev: *mut bindings::platform_device) { + // SAFETY: `pdev` is a valid pointer to a `struct platform_device`. + let ptr = unsafe { bindings::platform_get_drvdata(pdev) }; + + // SAFETY: `remove_callback` is only ever called after a successful call to + // `probe_callback`, hence it's guaranteed that `ptr` points to a valid and initialized + // `KBox<T>` pointer created through `KBox::into_foreign`. + let _ = unsafe { KBox::<T>::from_foreign(ptr) }; + } +} + +impl<T: Driver + 'static> driver::Adapter for Adapter<T> { + type IdInfo = T::IdInfo; + + fn of_id_table() -> Option<of::IdTable<Self::IdInfo>> { + T::OF_ID_TABLE + } +} + +/// Declares a kernel module that exposes a single platform driver. +/// +/// # Examples +/// +/// ```ignore +/// kernel::module_platform_driver! { +/// type: MyDriver, +/// name: "Module name", +/// author: "Author name", +/// description: "Description", +/// license: "GPL v2", +/// } +/// ``` +#[macro_export] +macro_rules! module_platform_driver { + ($($f:tt)*) => { + $crate::module_driver!(<T>, $crate::platform::Adapter<T>, { $($f)* }); + }; +} + +/// The platform driver trait. +/// +/// Drivers must implement this trait in order to get a platform driver registered. +/// +/// # Example +/// +///``` +/// # use kernel::{bindings, c_str, of, platform}; +/// +/// struct MyDriver; +/// +/// kernel::of_device_table!( +/// OF_TABLE, +/// MODULE_OF_TABLE, +/// <MyDriver as platform::Driver>::IdInfo, +/// [ +/// (of::DeviceId::new(c_str!("test,device")), ()) +/// ] +/// ); +/// +/// impl platform::Driver for MyDriver { +/// type IdInfo = (); +/// const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE); +/// +/// fn probe( +/// _pdev: &mut platform::Device, +/// _id_info: Option<&Self::IdInfo>, +/// ) -> Result<Pin<KBox<Self>>> { +/// Err(ENODEV) +/// } +/// } +///``` +pub trait Driver { + /// The type holding driver private data about each device id supported by the driver. + /// + /// TODO: Use associated_type_defaults once stabilized: + /// + /// type IdInfo: 'static = (); + type IdInfo: 'static; + + /// The table of OF device ids supported by the driver. + const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>>; + + /// Platform driver probe. + /// + /// Called when a new platform device is added or discovered. + /// Implementers should attempt to initialize the device here. + fn probe(dev: &mut Device, id_info: Option<&Self::IdInfo>) -> Result<Pin<KBox<Self>>>; +} + +/// The platform device representation. +/// +/// A platform device is based on an always reference counted `device:Device` instance. Cloning a +/// platform device, hence, also increments the base device' reference count. +/// +/// # Invariants +/// +/// `Device` holds a valid reference of `ARef<device::Device>` whose underlying `struct device` is a +/// member of a `struct platform_device`. +#[derive(Clone)] +pub struct Device(ARef<device::Device>); + +impl Device { + /// Convert a raw kernel device into a `Device` + /// + /// # Safety + /// + /// `dev` must be an `Aref<device::Device>` whose underlying `bindings::device` is a member of a + /// `bindings::platform_device`. + unsafe fn from_dev(dev: ARef<device::Device>) -> Self { + Self(dev) + } + + fn as_raw(&self) -> *mut bindings::platform_device { + // SAFETY: By the type invariant `self.0.as_raw` is a pointer to the `struct device` + // embedded in `struct platform_device`. + unsafe { container_of!(self.0.as_raw(), bindings::platform_device, dev) }.cast_mut() + } +} + +impl AsRef<device::Device> for Device { + fn as_ref(&self) -> &device::Device { + &self.0 + } +} diff --git a/rust/kernel/revocable.rs b/rust/kernel/revocable.rs new file mode 100644 index 000000000000..1e5a9d25c21b --- /dev/null +++ b/rust/kernel/revocable.rs @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Revocable objects. +//! +//! The [`Revocable`] type wraps other types and allows access to them to be revoked. The existence +//! of a [`RevocableGuard`] ensures that objects remain valid. + +use crate::{bindings, prelude::*, sync::rcu, types::Opaque}; +use core::{ + marker::PhantomData, + ops::Deref, + ptr::drop_in_place, + sync::atomic::{AtomicBool, Ordering}, +}; + +/// An object that can become inaccessible at runtime. +/// +/// Once access is revoked and all concurrent users complete (i.e., all existing instances of +/// [`RevocableGuard`] are dropped), the wrapped object is also dropped. +/// +/// # Examples +/// +/// ``` +/// # use kernel::revocable::Revocable; +/// +/// struct Example { +/// a: u32, +/// b: u32, +/// } +/// +/// fn add_two(v: &Revocable<Example>) -> Option<u32> { +/// let guard = v.try_access()?; +/// Some(guard.a + guard.b) +/// } +/// +/// let v = KBox::pin_init(Revocable::new(Example { a: 10, b: 20 }), GFP_KERNEL).unwrap(); +/// assert_eq!(add_two(&v), Some(30)); +/// v.revoke(); +/// assert_eq!(add_two(&v), None); +/// ``` +/// +/// Sample example as above, but explicitly using the rcu read side lock. +/// +/// ``` +/// # use kernel::revocable::Revocable; +/// use kernel::sync::rcu; +/// +/// struct Example { +/// a: u32, +/// b: u32, +/// } +/// +/// fn add_two(v: &Revocable<Example>) -> Option<u32> { +/// let guard = rcu::read_lock(); +/// let e = v.try_access_with_guard(&guard)?; +/// Some(e.a + e.b) +/// } +/// +/// let v = KBox::pin_init(Revocable::new(Example { a: 10, b: 20 }), GFP_KERNEL).unwrap(); +/// assert_eq!(add_two(&v), Some(30)); +/// v.revoke(); +/// assert_eq!(add_two(&v), None); +/// ``` +#[pin_data(PinnedDrop)] +pub struct Revocable<T> { + is_available: AtomicBool, + #[pin] + data: Opaque<T>, +} + +// SAFETY: `Revocable` is `Send` if the wrapped object is also `Send`. This is because while the +// functionality exposed by `Revocable` can be accessed from any thread/CPU, it is possible that +// this isn't supported by the wrapped object. +unsafe impl<T: Send> Send for Revocable<T> {} + +// SAFETY: `Revocable` is `Sync` if the wrapped object is both `Send` and `Sync`. We require `Send` +// from the wrapped object as well because of `Revocable::revoke`, which can trigger the `Drop` +// implementation of the wrapped object from an arbitrary thread. +unsafe impl<T: Sync + Send> Sync for Revocable<T> {} + +impl<T> Revocable<T> { + /// Creates a new revocable instance of the given data. + pub fn new(data: impl PinInit<T>) -> impl PinInit<Self> { + pin_init!(Self { + is_available: AtomicBool::new(true), + data <- Opaque::pin_init(data), + }) + } + + /// Tries to access the revocable wrapped object. + /// + /// Returns `None` if the object has been revoked and is therefore no longer accessible. + /// + /// Returns a guard that gives access to the object otherwise; the object is guaranteed to + /// remain accessible while the guard is alive. In such cases, callers are not allowed to sleep + /// because another CPU may be waiting to complete the revocation of this object. + pub fn try_access(&self) -> Option<RevocableGuard<'_, T>> { + let guard = rcu::read_lock(); + if self.is_available.load(Ordering::Relaxed) { + // Since `self.is_available` is true, data is initialised and has to remain valid + // because the RCU read side lock prevents it from being dropped. + Some(RevocableGuard::new(self.data.get(), guard)) + } else { + None + } + } + + /// Tries to access the revocable wrapped object. + /// + /// Returns `None` if the object has been revoked and is therefore no longer accessible. + /// + /// Returns a shared reference to the object otherwise; the object is guaranteed to + /// remain accessible while the rcu read side guard is alive. In such cases, callers are not + /// allowed to sleep because another CPU may be waiting to complete the revocation of this + /// object. + pub fn try_access_with_guard<'a>(&'a self, _guard: &'a rcu::Guard) -> Option<&'a T> { + if self.is_available.load(Ordering::Relaxed) { + // SAFETY: Since `self.is_available` is true, data is initialised and has to remain + // valid because the RCU read side lock prevents it from being dropped. + Some(unsafe { &*self.data.get() }) + } else { + None + } + } + + /// # Safety + /// + /// Callers must ensure that there are no more concurrent users of the revocable object. + unsafe fn revoke_internal<const SYNC: bool>(&self) { + if self.is_available.swap(false, Ordering::Relaxed) { + if SYNC { + // SAFETY: Just an FFI call, there are no further requirements. + unsafe { bindings::synchronize_rcu() }; + } + + // SAFETY: We know `self.data` is valid because only one CPU can succeed the + // `compare_exchange` above that takes `is_available` from `true` to `false`. + unsafe { drop_in_place(self.data.get()) }; + } + } + + /// Revokes access to and drops the wrapped object. + /// + /// Access to the object is revoked immediately to new callers of [`Revocable::try_access`], + /// expecting that there are no concurrent users of the object. + /// + /// # Safety + /// + /// Callers must ensure that there are no more concurrent users of the revocable object. + pub unsafe fn revoke_nosync(&self) { + // SAFETY: By the safety requirement of this function, the caller ensures that nobody is + // accessing the data anymore and hence we don't have to wait for the grace period to + // finish. + unsafe { self.revoke_internal::<false>() } + } + + /// Revokes access to and drops the wrapped object. + /// + /// Access to the object is revoked immediately to new callers of [`Revocable::try_access`]. + /// + /// If there are concurrent users of the object (i.e., ones that called + /// [`Revocable::try_access`] beforehand and still haven't dropped the returned guard), this + /// function waits for the concurrent access to complete before dropping the wrapped object. + pub fn revoke(&self) { + // SAFETY: By passing `true` we ask `revoke_internal` to wait for the grace period to + // finish. + unsafe { self.revoke_internal::<true>() } + } +} + +#[pinned_drop] +impl<T> PinnedDrop for Revocable<T> { + fn drop(self: Pin<&mut Self>) { + // Drop only if the data hasn't been revoked yet (in which case it has already been + // dropped). + // SAFETY: We are not moving out of `p`, only dropping in place + let p = unsafe { self.get_unchecked_mut() }; + if *p.is_available.get_mut() { + // SAFETY: We know `self.data` is valid because no other CPU has changed + // `is_available` to `false` yet, and no other CPU can do it anymore because this CPU + // holds the only reference (mutable) to `self` now. + unsafe { drop_in_place(p.data.get()) }; + } + } +} + +/// A guard that allows access to a revocable object and keeps it alive. +/// +/// CPUs may not sleep while holding on to [`RevocableGuard`] because it's in atomic context +/// holding the RCU read-side lock. +/// +/// # Invariants +/// +/// The RCU read-side lock is held while the guard is alive. +pub struct RevocableGuard<'a, T> { + data_ref: *const T, + _rcu_guard: rcu::Guard, + _p: PhantomData<&'a ()>, +} + +impl<T> RevocableGuard<'_, T> { + fn new(data_ref: *const T, rcu_guard: rcu::Guard) -> Self { + Self { + data_ref, + _rcu_guard: rcu_guard, + _p: PhantomData, + } + } +} + +impl<T> Deref for RevocableGuard<'_, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + // SAFETY: By the type invariants, we hold the rcu read-side lock, so the object is + // guaranteed to remain valid. + unsafe { &*self.data_ref } + } +} diff --git a/rust/kernel/sync.rs b/rust/kernel/sync.rs index dffdaad972ce..3498fb344dc9 100644 --- a/rust/kernel/sync.rs +++ b/rust/kernel/sync.rs @@ -12,6 +12,7 @@ mod condvar; pub mod lock; mod locked_by; pub mod poll; +pub mod rcu; pub use arc::{Arc, ArcBorrow, UniqueArc}; pub use condvar::{new_condvar, CondVar, CondVarTimeoutResult}; diff --git a/rust/kernel/sync/rcu.rs b/rust/kernel/sync/rcu.rs new file mode 100644 index 000000000000..b51d9150ffe2 --- /dev/null +++ b/rust/kernel/sync/rcu.rs @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! RCU support. +//! +//! C header: [`include/linux/rcupdate.h`](srctree/include/linux/rcupdate.h) + +use crate::{bindings, types::NotThreadSafe}; + +/// Evidence that the RCU read side lock is held on the current thread/CPU. +/// +/// The type is explicitly not `Send` because this property is per-thread/CPU. +/// +/// # Invariants +/// +/// The RCU read side lock is actually held while instances of this guard exist. +pub struct Guard(NotThreadSafe); + +impl Guard { + /// Acquires the RCU read side lock and returns a guard. + pub fn new() -> Self { + // SAFETY: An FFI call with no additional requirements. + unsafe { bindings::rcu_read_lock() }; + // INVARIANT: The RCU read side lock was just acquired above. + Self(NotThreadSafe) + } + + /// Explicitly releases the RCU read side lock. + pub fn unlock(self) {} +} + +impl Default for Guard { + fn default() -> Self { + Self::new() + } +} + +impl Drop for Guard { + fn drop(&mut self) { + // SAFETY: By the type invariants, the RCU read side is locked, so it is ok to unlock it. + unsafe { bindings::rcu_read_unlock() }; + } +} + +/// Acquires the RCU read side lock. +pub fn read_lock() -> Guard { + Guard::new() +} diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index 0dfaf45a755c..2bbaab83b9d6 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -326,6 +326,17 @@ impl<T> Opaque<T> { } } + /// Create an opaque pin-initializer from the given pin-initializer. + pub fn pin_init(slot: impl PinInit<T>) -> impl PinInit<Self> { + Self::ffi_init(|ptr: *mut T| { + // SAFETY: + // - `ptr` is a valid pointer to uninitialized memory, + // - `slot` is not accessed on error; the call is infallible, + // - `slot` is pinned in memory. + let _ = unsafe { init::PinInit::<T>::__pinned_init(slot, ptr) }; + }) + } + /// Creates a pin-initializer from the given initializer closure. /// /// The returned initializer calls the given closure with the pointer to the inner `T` of this diff --git a/rust/macros/module.rs b/rust/macros/module.rs index 2587f41b0d39..cdf94f4982df 100644 --- a/rust/macros/module.rs +++ b/rust/macros/module.rs @@ -228,6 +228,10 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { kernel::ThisModule::from_ptr(core::ptr::null_mut()) }}; + impl kernel::ModuleMetadata for {type_} {{ + const NAME: &'static kernel::str::CStr = kernel::c_str!(\"{name}\"); + }} + // Double nested modules, since then nobody can access the public items inside. mod __module_init {{ mod __module_init {{ diff --git a/samples/check-exec/inc.c b/samples/check-exec/inc.c index 94b87569d2a2..7f6ef06a2f06 100644 --- a/samples/check-exec/inc.c +++ b/samples/check-exec/inc.c @@ -21,8 +21,15 @@ #include <stdlib.h> #include <string.h> #include <sys/prctl.h> +#include <sys/syscall.h> #include <unistd.h> +static int sys_execveat(int dirfd, const char *pathname, char *const argv[], + char *const envp[], int flags) +{ + return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags); +} + /* Returns 1 on error, 0 otherwise. */ static int interpret_buffer(char *buffer, size_t buffer_size) { @@ -78,8 +85,8 @@ static int interpret_stream(FILE *script, char *const script_name, * script execution. We must use the script file descriptor instead of * the script path name to avoid race conditions. */ - err = execveat(fileno(script), "", script_argv, envp, - AT_EMPTY_PATH | AT_EXECVE_CHECK); + err = sys_execveat(fileno(script), "", script_argv, envp, + AT_EMPTY_PATH | AT_EXECVE_CHECK); if (err && restrict_stream) { perror("ERROR: Script execution check"); return 1; diff --git a/samples/rust/Kconfig b/samples/rust/Kconfig index b0f74a81c8f9..918dbead2c0b 100644 --- a/samples/rust/Kconfig +++ b/samples/rust/Kconfig @@ -20,6 +20,16 @@ config SAMPLE_RUST_MINIMAL If unsure, say N. +config SAMPLE_RUST_MISC_DEVICE + tristate "Misc device" + help + This option builds the Rust misc device. + + To compile this as a module, choose M here: + the module will be called rust_misc_device. + + If unsure, say N. + config SAMPLE_RUST_PRINT tristate "Printing macros" help @@ -30,6 +40,27 @@ config SAMPLE_RUST_PRINT If unsure, say N. +config SAMPLE_RUST_DRIVER_PCI + tristate "PCI Driver" + depends on PCI + help + This option builds the Rust PCI driver sample. + + To compile this as a module, choose M here: + the module will be called driver_pci. + + If unsure, say N. + +config SAMPLE_RUST_DRIVER_PLATFORM + tristate "Platform Driver" + help + This option builds the Rust Platform driver sample. + + To compile this as a module, choose M here: + the module will be called rust_driver_platform. + + If unsure, say N. + config SAMPLE_RUST_HOSTPROGS bool "Host programs" help diff --git a/samples/rust/Makefile b/samples/rust/Makefile index c1a5c1655395..5a8ab0df0567 100644 --- a/samples/rust/Makefile +++ b/samples/rust/Makefile @@ -2,7 +2,10 @@ ccflags-y += -I$(src) # needed for trace events obj-$(CONFIG_SAMPLE_RUST_MINIMAL) += rust_minimal.o +obj-$(CONFIG_SAMPLE_RUST_MISC_DEVICE) += rust_misc_device.o obj-$(CONFIG_SAMPLE_RUST_PRINT) += rust_print.o +obj-$(CONFIG_SAMPLE_RUST_DRIVER_PCI) += rust_driver_pci.o +obj-$(CONFIG_SAMPLE_RUST_DRIVER_PLATFORM) += rust_driver_platform.o rust_print-y := rust_print_main.o rust_print_events.o diff --git a/samples/rust/rust_driver_pci.rs b/samples/rust/rust_driver_pci.rs new file mode 100644 index 000000000000..1fb6e44f3395 --- /dev/null +++ b/samples/rust/rust_driver_pci.rs @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust PCI driver sample (based on QEMU's `pci-testdev`). +//! +//! To make this driver probe, QEMU must be run with `-device pci-testdev`. + +use kernel::{bindings, c_str, devres::Devres, pci, prelude::*}; + +struct Regs; + +impl Regs { + const TEST: usize = 0x0; + const OFFSET: usize = 0x4; + const DATA: usize = 0x8; + const COUNT: usize = 0xC; + const END: usize = 0x10; +} + +type Bar0 = pci::Bar<{ Regs::END }>; + +#[derive(Debug)] +struct TestIndex(u8); + +impl TestIndex { + const NO_EVENTFD: Self = Self(0); +} + +struct SampleDriver { + pdev: pci::Device, + bar: Devres<Bar0>, +} + +kernel::pci_device_table!( + PCI_TABLE, + MODULE_PCI_TABLE, + <SampleDriver as pci::Driver>::IdInfo, + [( + pci::DeviceId::from_id(bindings::PCI_VENDOR_ID_REDHAT, 0x5), + TestIndex::NO_EVENTFD + )] +); + +impl SampleDriver { + fn testdev(index: &TestIndex, bar: &Bar0) -> Result<u32> { + // Select the test. + bar.writeb(index.0, Regs::TEST); + + let offset = u32::from_le(bar.readl(Regs::OFFSET)) as usize; + let data = bar.readb(Regs::DATA); + + // Write `data` to `offset` to increase `count` by one. + // + // Note that we need `try_writeb`, since `offset` can't be checked at compile-time. + bar.try_writeb(data, offset)?; + + Ok(bar.readl(Regs::COUNT)) + } +} + +impl pci::Driver for SampleDriver { + type IdInfo = TestIndex; + + const ID_TABLE: pci::IdTable<Self::IdInfo> = &PCI_TABLE; + + fn probe(pdev: &mut pci::Device, info: &Self::IdInfo) -> Result<Pin<KBox<Self>>> { + dev_dbg!( + pdev.as_ref(), + "Probe Rust PCI driver sample (PCI ID: 0x{:x}, 0x{:x}).\n", + pdev.vendor_id(), + pdev.device_id() + ); + + pdev.enable_device_mem()?; + pdev.set_master(); + + let bar = pdev.iomap_region_sized::<{ Regs::END }>(0, c_str!("rust_driver_pci"))?; + + let drvdata = KBox::new( + Self { + pdev: pdev.clone(), + bar, + }, + GFP_KERNEL, + )?; + + let bar = drvdata.bar.try_access().ok_or(ENXIO)?; + + dev_info!( + pdev.as_ref(), + "pci-testdev data-match count: {}\n", + Self::testdev(info, &bar)? + ); + + Ok(drvdata.into()) + } +} + +impl Drop for SampleDriver { + fn drop(&mut self) { + dev_dbg!(self.pdev.as_ref(), "Remove Rust PCI driver sample.\n"); + } +} + +kernel::module_pci_driver! { + type: SampleDriver, + name: "rust_driver_pci", + author: "Danilo Krummrich", + description: "Rust PCI driver", + license: "GPL v2", +} diff --git a/samples/rust/rust_driver_platform.rs b/samples/rust/rust_driver_platform.rs new file mode 100644 index 000000000000..8120609e2940 --- /dev/null +++ b/samples/rust/rust_driver_platform.rs @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust Platform driver sample. + +use kernel::{c_str, of, platform, prelude::*}; + +struct SampleDriver { + pdev: platform::Device, +} + +struct Info(u32); + +kernel::of_device_table!( + OF_TABLE, + MODULE_OF_TABLE, + <SampleDriver as platform::Driver>::IdInfo, + [(of::DeviceId::new(c_str!("test,rust-device")), Info(42))] +); + +impl platform::Driver for SampleDriver { + type IdInfo = Info; + const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE); + + fn probe(pdev: &mut platform::Device, info: Option<&Self::IdInfo>) -> Result<Pin<KBox<Self>>> { + dev_dbg!(pdev.as_ref(), "Probe Rust Platform driver sample.\n"); + + if let Some(info) = info { + dev_info!(pdev.as_ref(), "Probed with info: '{}'.\n", info.0); + } + + let drvdata = KBox::new(Self { pdev: pdev.clone() }, GFP_KERNEL)?; + + Ok(drvdata.into()) + } +} + +impl Drop for SampleDriver { + fn drop(&mut self) { + dev_dbg!(self.pdev.as_ref(), "Remove Rust Platform driver sample.\n"); + } +} + +kernel::module_platform_driver! { + type: SampleDriver, + name: "rust_driver_platform", + author: "Danilo Krummrich", + description: "Rust Platform driver", + license: "GPL v2", +} diff --git a/samples/rust/rust_misc_device.rs b/samples/rust/rust_misc_device.rs new file mode 100644 index 000000000000..40ad7266c225 --- /dev/null +++ b/samples/rust/rust_misc_device.rs @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! Rust misc device sample. + +/// Below is an example userspace C program that exercises this sample's functionality. +/// +/// ```c +/// #include <stdio.h> +/// #include <stdlib.h> +/// #include <errno.h> +/// #include <fcntl.h> +/// #include <unistd.h> +/// #include <sys/ioctl.h> +/// +/// #define RUST_MISC_DEV_FAIL _IO('|', 0) +/// #define RUST_MISC_DEV_HELLO _IO('|', 0x80) +/// #define RUST_MISC_DEV_GET_VALUE _IOR('|', 0x81, int) +/// #define RUST_MISC_DEV_SET_VALUE _IOW('|', 0x82, int) +/// +/// int main() { +/// int value, new_value; +/// int fd, ret; +/// +/// // Open the device file +/// printf("Opening /dev/rust-misc-device for reading and writing\n"); +/// fd = open("/dev/rust-misc-device", O_RDWR); +/// if (fd < 0) { +/// perror("open"); +/// return errno; +/// } +/// +/// // Make call into driver to say "hello" +/// printf("Calling Hello\n"); +/// ret = ioctl(fd, RUST_MISC_DEV_HELLO, NULL); +/// if (ret < 0) { +/// perror("ioctl: Failed to call into Hello"); +/// close(fd); +/// return errno; +/// } +/// +/// // Get initial value +/// printf("Fetching initial value\n"); +/// ret = ioctl(fd, RUST_MISC_DEV_GET_VALUE, &value); +/// if (ret < 0) { +/// perror("ioctl: Failed to fetch the initial value"); +/// close(fd); +/// return errno; +/// } +/// +/// value++; +/// +/// // Set value to something different +/// printf("Submitting new value (%d)\n", value); +/// ret = ioctl(fd, RUST_MISC_DEV_SET_VALUE, &value); +/// if (ret < 0) { +/// perror("ioctl: Failed to submit new value"); +/// close(fd); +/// return errno; +/// } +/// +/// // Ensure new value was applied +/// printf("Fetching new value\n"); +/// ret = ioctl(fd, RUST_MISC_DEV_GET_VALUE, &new_value); +/// if (ret < 0) { +/// perror("ioctl: Failed to fetch the new value"); +/// close(fd); +/// return errno; +/// } +/// +/// if (value != new_value) { +/// printf("Failed: Committed and retrieved values are different (%d - %d)\n", value, new_value); +/// close(fd); +/// return -1; +/// } +/// +/// // Call the unsuccessful ioctl +/// printf("Attempting to call in to an non-existent IOCTL\n"); +/// ret = ioctl(fd, RUST_MISC_DEV_FAIL, NULL); +/// if (ret < 0) { +/// perror("ioctl: Succeeded to fail - this was expected"); +/// } else { +/// printf("ioctl: Failed to fail\n"); +/// close(fd); +/// return -1; +/// } +/// +/// // Close the device file +/// printf("Closing /dev/rust-misc-device\n"); +/// close(fd); +/// +/// printf("Success\n"); +/// return 0; +/// } +/// ``` +use core::pin::Pin; + +use kernel::{ + c_str, + device::Device, + fs::File, + ioctl::{_IO, _IOC_SIZE, _IOR, _IOW}, + miscdevice::{MiscDevice, MiscDeviceOptions, MiscDeviceRegistration}, + new_mutex, + prelude::*, + sync::Mutex, + types::ARef, + uaccess::{UserSlice, UserSliceReader, UserSliceWriter}, +}; + +const RUST_MISC_DEV_HELLO: u32 = _IO('|' as u32, 0x80); +const RUST_MISC_DEV_GET_VALUE: u32 = _IOR::<i32>('|' as u32, 0x81); +const RUST_MISC_DEV_SET_VALUE: u32 = _IOW::<i32>('|' as u32, 0x82); + +module! { + type: RustMiscDeviceModule, + name: "rust_misc_device", + author: "Lee Jones", + description: "Rust misc device sample", + license: "GPL", +} + +#[pin_data] +struct RustMiscDeviceModule { + #[pin] + _miscdev: MiscDeviceRegistration<RustMiscDevice>, +} + +impl kernel::InPlaceModule for RustMiscDeviceModule { + fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> { + pr_info!("Initialising Rust Misc Device Sample\n"); + + let options = MiscDeviceOptions { + name: c_str!("rust-misc-device"), + }; + + try_pin_init!(Self { + _miscdev <- MiscDeviceRegistration::register(options), + }) + } +} + +struct Inner { + value: i32, +} + +#[pin_data(PinnedDrop)] +struct RustMiscDevice { + #[pin] + inner: Mutex<Inner>, + dev: ARef<Device>, +} + +#[vtable] +impl MiscDevice for RustMiscDevice { + type Ptr = Pin<KBox<Self>>; + + fn open(_file: &File, misc: &MiscDeviceRegistration<Self>) -> Result<Pin<KBox<Self>>> { + let dev = ARef::from(misc.device()); + + dev_info!(dev, "Opening Rust Misc Device Sample\n"); + + KBox::try_pin_init( + try_pin_init! { + RustMiscDevice { + inner <- new_mutex!( Inner{ value: 0_i32 } ), + dev: dev, + } + }, + GFP_KERNEL, + ) + } + + fn ioctl(me: Pin<&RustMiscDevice>, _file: &File, cmd: u32, arg: usize) -> Result<isize> { + dev_info!(me.dev, "IOCTLing Rust Misc Device Sample\n"); + + let size = _IOC_SIZE(cmd); + + match cmd { + RUST_MISC_DEV_GET_VALUE => me.get_value(UserSlice::new(arg, size).writer())?, + RUST_MISC_DEV_SET_VALUE => me.set_value(UserSlice::new(arg, size).reader())?, + RUST_MISC_DEV_HELLO => me.hello()?, + _ => { + dev_err!(me.dev, "-> IOCTL not recognised: {}\n", cmd); + return Err(ENOTTY); + } + }; + + Ok(0) + } +} + +#[pinned_drop] +impl PinnedDrop for RustMiscDevice { + fn drop(self: Pin<&mut Self>) { + dev_info!(self.dev, "Exiting the Rust Misc Device Sample\n"); + } +} + +impl RustMiscDevice { + fn set_value(&self, mut reader: UserSliceReader) -> Result<isize> { + let new_value = reader.read::<i32>()?; + let mut guard = self.inner.lock(); + + dev_info!( + self.dev, + "-> Copying data from userspace (value: {})\n", + new_value + ); + + guard.value = new_value; + Ok(0) + } + + fn get_value(&self, mut writer: UserSliceWriter) -> Result<isize> { + let guard = self.inner.lock(); + let value = guard.value; + + // Free-up the lock and use our locally cached instance from here + drop(guard); + + dev_info!( + self.dev, + "-> Copying data to userspace (value: {})\n", + &value + ); + + writer.write::<i32>(&value)?; + Ok(0) + } + + fn hello(&self) -> Result<isize> { + dev_info!(self.dev, "-> Hello from the Rust Misc Device\n"); + + Ok(0) + } +} diff --git a/scripts/Makefile b/scripts/Makefile index 546e8175e1c4..46f860529df5 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -53,7 +53,8 @@ hostprogs += unifdef gen_packed_field_checks targets += module.lds subdir-$(CONFIG_GCC_PLUGINS) += gcc-plugins -subdir-$(CONFIG_MODVERSIONS) += genksyms +subdir-$(CONFIG_GENKSYMS) += genksyms +subdir-$(CONFIG_GENDWARFKSYMS) += gendwarfksyms subdir-$(CONFIG_SECURITY_SELINUX) += selinux subdir-$(CONFIG_SECURITY_IPE) += ipe diff --git a/scripts/Makefile.build b/scripts/Makefile.build index c16e4cf54d77..993708d11874 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -107,13 +107,24 @@ cmd_cpp_i_c = $(CPP) $(c_flags) -o $@ $< $(obj)/%.i: $(obj)/%.c FORCE $(call if_changed_dep,cpp_i_c) +getexportsymbols = $(NM) $@ | sed -n 's/.* __export_symbol_\(.*\)/$(1)/p' + +gendwarfksyms = $(objtree)/scripts/gendwarfksyms/gendwarfksyms \ + $(if $(KBUILD_SYMTYPES), --symtypes $(@:.o=.symtypes)) \ + $(if $(KBUILD_GENDWARFKSYMS_STABLE), --stable) + genksyms = $(objtree)/scripts/genksyms/genksyms \ $(if $(KBUILD_SYMTYPES), -T $(@:.o=.symtypes)) \ $(if $(KBUILD_PRESERVE), -p) \ $(addprefix -r , $(wildcard $(@:.o=.symref))) # These mirror gensymtypes_S and co below, keep them in synch. +ifdef CONFIG_GENDWARFKSYMS +cmd_gensymtypes_c = $(if $(skip_gendwarfksyms),, \ + $(call getexportsymbols,\1) | $(gendwarfksyms) $@) +else cmd_gensymtypes_c = $(CPP) -D__GENKSYMS__ $(c_flags) $< | $(genksyms) +endif # CONFIG_GENDWARFKSYMS # LLVM assembly # Generate .ll files from .c @@ -183,7 +194,9 @@ endif # CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT is-standard-object = $(if $(filter-out y%, $(OBJECT_FILES_NON_STANDARD_$(target-stem).o)$(OBJECT_FILES_NON_STANDARD)n),$(is-kernel-object)) +ifdef CONFIG_OBJTOOL $(obj)/%.o: private objtool-enabled = $(if $(is-standard-object),$(if $(delay-objtool),$(is-single-obj-m),y)) +endif ifneq ($(findstring 1, $(KBUILD_EXTRA_WARN)),) cmd_warn_shared_object = $(if $(word 2, $(modname-multi)),$(warning $(kbuild-file): $*.o is added to multiple modules: $(modname-multi))) @@ -286,14 +299,26 @@ $(obj)/%.rs: $(obj)/%.rs.S FORCE # This is convoluted. The .S file must first be preprocessed to run guards and # expand names, then the resulting exports must be constructed into plain # EXPORT_SYMBOL(symbol); to build our dummy C file, and that gets preprocessed -# to make the genksyms input. +# to make the genksyms input or compiled into an object for gendwarfksyms. # # These mirror gensymtypes_c and co above, keep them in synch. -cmd_gensymtypes_S = \ - { echo "\#include <linux/kernel.h>" ; \ - echo "\#include <asm/asm-prototypes.h>" ; \ - $(NM) $@ | sed -n 's/.* __export_symbol_\(.*\)/EXPORT_SYMBOL(\1);/p' ; } | \ - $(CPP) -D__GENKSYMS__ $(c_flags) -xc - | $(genksyms) +getasmexports = \ + { echo "\#include <linux/kernel.h>" ; \ + echo "\#include <linux/string.h>" ; \ + echo "\#include <asm/asm-prototypes.h>" ; \ + $(call getexportsymbols,EXPORT_SYMBOL(\1);) ; } + +ifdef CONFIG_GENDWARFKSYMS +cmd_gensymtypes_S = \ + $(getasmexports) | \ + $(CC) $(c_flags) -c -o $(@:.o=.gendwarfksyms.o) -xc -; \ + $(call getexportsymbols,\1) | \ + $(gendwarfksyms) $(@:.o=.gendwarfksyms.o) +else +cmd_gensymtypes_S = \ + $(getasmexports) | \ + $(CPP) -D__GENKSYMS__ $(c_flags) -xc - | $(genksyms) +endif # CONFIG_GENDWARFKSYMS quiet_cmd_cpp_s_S = CPP $(quiet_modtag) $@ cmd_cpp_s_S = $(CPP) $(a_flags) -o $@ $< diff --git a/scripts/Makefile.defconf b/scripts/Makefile.defconf index 226ea3df3b4b..a44307f08e9d 100644 --- a/scripts/Makefile.defconf +++ b/scripts/Makefile.defconf @@ -1,6 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 # Configuration heplers +cmd_merge_fragments = \ + $(srctree)/scripts/kconfig/merge_config.sh \ + $4 -m -O $(objtree) $(srctree)/arch/$(SRCARCH)/configs/$2 \ + $(foreach config,$3,$(srctree)/arch/$(SRCARCH)/configs/$(config).config) + # Creates 'merged defconfigs' # --------------------------------------------------------------------------- # Usage: @@ -8,9 +13,7 @@ # # Input config fragments without '.config' suffix define merge_into_defconfig - $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \ - -m -O $(objtree) $(srctree)/arch/$(SRCARCH)/configs/$(1) \ - $(foreach config,$(2),$(srctree)/arch/$(SRCARCH)/configs/$(config).config) + $(call cmd,merge_fragments,$1,$2) +$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig endef @@ -22,8 +25,6 @@ endef # # Input config fragments without '.config' suffix define merge_into_defconfig_override - $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \ - -Q -m -O $(objtree) $(srctree)/arch/$(SRCARCH)/configs/$(1) \ - $(foreach config,$(2),$(srctree)/arch/$(SRCARCH)/configs/$(config).config) + $(call cmd,merge_fragments,$1,$2,-Q) +$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig endef diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 1d13cecc7cc7..eb719f6d8d53 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -77,6 +77,9 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init) # Warn if there is an enum types mismatch KBUILD_CFLAGS += $(call cc-option,-Wenum-conversion) +# Explicitly clear padding bits during variable initialization +KBUILD_CFLAGS += $(call cc-option,-fzero-init-padding-bits=all) + KBUILD_CFLAGS += -Wextra KBUILD_CFLAGS += -Wunused diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 7395200538da..ad55ef201aac 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -287,6 +287,8 @@ delay-objtool := $(or $(CONFIG_LTO_CLANG),$(CONFIG_X86_KERNEL_IBT)) cmd_objtool = $(if $(objtool-enabled), ; $(objtool) $(objtool-args) $@) cmd_gen_objtooldep = $(if $(objtool-enabled), { echo ; echo '$@: $$(wildcard $(objtool))' ; } >> $(dot-target).cmd) +objtool-enabled := y + endif # CONFIG_OBJTOOL # Useful for describing the dependency of composite objects @@ -302,11 +304,11 @@ endef # =========================================================================== # These are shared by some Makefile.* files. -objtool-enabled := y - ifdef CONFIG_LTO_CLANG -# objtool cannot process LLVM IR. Make $(LD) covert LLVM IR to ELF here. -cmd_ld_single = $(if $(objtool-enabled), ; $(LD) $(ld_flags) -r -o $(tmp-target) $@; mv $(tmp-target) $@) +# Run $(LD) here to covert LLVM IR to ELF in the following cases: +# - when this object needs objtool processing, as objtool cannot process LLVM IR +# - when this is a single-object module, as modpost cannot process LLVM IR +cmd_ld_single = $(if $(objtool-enabled)$(is-single-obj-m), ; $(LD) $(ld_flags) -r -o $(tmp-target) $@; mv $(tmp-target) $@) endif quiet_cmd_cc_o_c = CC $(quiet_modtag) $@ @@ -374,6 +376,9 @@ quiet_cmd_ar = AR $@ quiet_cmd_objcopy = OBJCOPY $@ cmd_objcopy = $(OBJCOPY) $(OBJCOPYFLAGS) $(OBJCOPYFLAGS_$(@F)) $< $@ +quiet_cmd_strip_relocs = RSTRIP $@ +cmd_strip_relocs = $(OBJCOPY) --remove-section='.rel*' $@ + # Gzip # --------------------------------------------------------------------------- diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst index f97c9926ed31..1628198f3e83 100644 --- a/scripts/Makefile.modinst +++ b/scripts/Makefile.modinst @@ -105,7 +105,7 @@ else sig-key := $(CONFIG_MODULE_SIG_KEY) endif quiet_cmd_sign = SIGN $@ - cmd_sign = scripts/sign-file $(CONFIG_MODULE_SIG_HASH) "$(sig-key)" certs/signing_key.x509 $@ \ + cmd_sign = $(objtree)/scripts/sign-file $(CONFIG_MODULE_SIG_HASH) "$(sig-key)" $(objtree)/certs/signing_key.x509 $@ \ $(if $(KBUILD_EXTMOD),|| true) ifeq ($(sign-only),) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index ab0e94ea6249..d7d45067d08b 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -43,6 +43,8 @@ MODPOST = $(objtree)/scripts/mod/modpost modpost-args = \ $(if $(CONFIG_MODULES),-M) \ $(if $(CONFIG_MODVERSIONS),-m) \ + $(if $(CONFIG_BASIC_MODVERSIONS),-b) \ + $(if $(CONFIG_EXTENDED_MODVERSIONS),-x) \ $(if $(CONFIG_MODULE_SRCVERSION_ALL),-a) \ $(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E) \ $(if $(KBUILD_MODPOST_WARN),-w) \ diff --git a/scripts/gendwarfksyms/.gitignore b/scripts/gendwarfksyms/.gitignore new file mode 100644 index 000000000000..0927f8d3cd96 --- /dev/null +++ b/scripts/gendwarfksyms/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +/gendwarfksyms diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile new file mode 100644 index 000000000000..6334c7d3c4d5 --- /dev/null +++ b/scripts/gendwarfksyms/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +hostprogs-always-y += gendwarfksyms + +gendwarfksyms-objs += gendwarfksyms.o +gendwarfksyms-objs += cache.o +gendwarfksyms-objs += die.o +gendwarfksyms-objs += dwarf.o +gendwarfksyms-objs += kabi.o +gendwarfksyms-objs += symbols.o +gendwarfksyms-objs += types.o + +HOSTLDLIBS_gendwarfksyms := -ldw -lelf -lz diff --git a/scripts/gendwarfksyms/cache.c b/scripts/gendwarfksyms/cache.c new file mode 100644 index 000000000000..c9c19b86a686 --- /dev/null +++ b/scripts/gendwarfksyms/cache.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + */ + +#include "gendwarfksyms.h" + +struct cache_item { + unsigned long key; + int value; + struct hlist_node hash; +}; + +void cache_set(struct cache *cache, unsigned long key, int value) +{ + struct cache_item *ci; + + ci = xmalloc(sizeof(struct cache_item)); + ci->key = key; + ci->value = value; + hash_add(cache->cache, &ci->hash, hash_32(key)); +} + +int cache_get(struct cache *cache, unsigned long key) +{ + struct cache_item *ci; + + hash_for_each_possible(cache->cache, ci, hash, hash_32(key)) { + if (ci->key == key) + return ci->value; + } + + return -1; +} + +void cache_init(struct cache *cache) +{ + hash_init(cache->cache); +} + +void cache_free(struct cache *cache) +{ + struct hlist_node *tmp; + struct cache_item *ci; + + hash_for_each_safe(cache->cache, ci, tmp, hash) { + free(ci); + } + + hash_init(cache->cache); +} diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c new file mode 100644 index 000000000000..66bd4c9bc952 --- /dev/null +++ b/scripts/gendwarfksyms/die.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + */ + +#include <string.h> +#include "gendwarfksyms.h" + +#define DIE_HASH_BITS 15 + +/* {die->addr, state} -> struct die * */ +static HASHTABLE_DEFINE(die_map, 1 << DIE_HASH_BITS); + +static unsigned int map_hits; +static unsigned int map_misses; + +static inline unsigned int die_hash(uintptr_t addr, enum die_state state) +{ + return hash_32(addr_hash(addr) ^ (unsigned int)state); +} + +static void init_die(struct die *cd) +{ + cd->state = DIE_INCOMPLETE; + cd->mapped = false; + cd->fqn = NULL; + cd->tag = -1; + cd->addr = 0; + INIT_LIST_HEAD(&cd->fragments); +} + +static struct die *create_die(Dwarf_Die *die, enum die_state state) +{ + struct die *cd; + + cd = xmalloc(sizeof(struct die)); + init_die(cd); + cd->addr = (uintptr_t)die->addr; + + hash_add(die_map, &cd->hash, die_hash(cd->addr, state)); + return cd; +} + +int __die_map_get(uintptr_t addr, enum die_state state, struct die **res) +{ + struct die *cd; + + hash_for_each_possible(die_map, cd, hash, die_hash(addr, state)) { + if (cd->addr == addr && cd->state == state) { + *res = cd; + return 0; + } + } + + return -1; +} + +struct die *die_map_get(Dwarf_Die *die, enum die_state state) +{ + struct die *cd; + + if (__die_map_get((uintptr_t)die->addr, state, &cd) == 0) { + map_hits++; + return cd; + } + + map_misses++; + return create_die(die, state); +} + +static void reset_die(struct die *cd) +{ + struct die_fragment *tmp; + struct die_fragment *df; + + list_for_each_entry_safe(df, tmp, &cd->fragments, list) { + if (df->type == FRAGMENT_STRING) + free(df->data.str); + free(df); + } + + if (cd->fqn && *cd->fqn) + free(cd->fqn); + init_die(cd); +} + +void die_map_for_each(die_map_callback_t func, void *arg) +{ + struct hlist_node *tmp; + struct die *cd; + + hash_for_each_safe(die_map, cd, tmp, hash) { + func(cd, arg); + } +} + +void die_map_free(void) +{ + struct hlist_node *tmp; + unsigned int stats[DIE_LAST + 1]; + struct die *cd; + int i; + + memset(stats, 0, sizeof(stats)); + + hash_for_each_safe(die_map, cd, tmp, hash) { + stats[cd->state]++; + reset_die(cd); + free(cd); + } + hash_init(die_map); + + if (map_hits + map_misses > 0) + debug("hits %u, misses %u (hit rate %.02f%%)", map_hits, + map_misses, + (100.0f * map_hits) / (map_hits + map_misses)); + + for (i = 0; i <= DIE_LAST; i++) + debug("%s: %u entries", die_state_name(i), stats[i]); +} + +static struct die_fragment *append_item(struct die *cd) +{ + struct die_fragment *df; + + df = xmalloc(sizeof(struct die_fragment)); + df->type = FRAGMENT_EMPTY; + list_add_tail(&df->list, &cd->fragments); + return df; +} + +void die_map_add_string(struct die *cd, const char *str) +{ + struct die_fragment *df; + + if (!cd) + return; + + df = append_item(cd); + df->data.str = xstrdup(str); + df->type = FRAGMENT_STRING; +} + +void die_map_add_linebreak(struct die *cd, int linebreak) +{ + struct die_fragment *df; + + if (!cd) + return; + + df = append_item(cd); + df->data.linebreak = linebreak; + df->type = FRAGMENT_LINEBREAK; +} + +void die_map_add_die(struct die *cd, struct die *child) +{ + struct die_fragment *df; + + if (!cd) + return; + + df = append_item(cd); + df->data.addr = child->addr; + df->type = FRAGMENT_DIE; +} diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c new file mode 100644 index 000000000000..534d9aa7c114 --- /dev/null +++ b/scripts/gendwarfksyms/dwarf.c @@ -0,0 +1,1159 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + */ + +#include <assert.h> +#include <inttypes.h> +#include <stdarg.h> +#include "gendwarfksyms.h" + +/* See get_union_kabi_status */ +#define KABI_PREFIX "__kabi_" +#define KABI_PREFIX_LEN (sizeof(KABI_PREFIX) - 1) +#define KABI_RESERVED_PREFIX "reserved" +#define KABI_RESERVED_PREFIX_LEN (sizeof(KABI_RESERVED_PREFIX) - 1) +#define KABI_RENAMED_PREFIX "renamed" +#define KABI_RENAMED_PREFIX_LEN (sizeof(KABI_RENAMED_PREFIX) - 1) +#define KABI_IGNORED_PREFIX "ignored" +#define KABI_IGNORED_PREFIX_LEN (sizeof(KABI_IGNORED_PREFIX) - 1) + +static inline bool is_kabi_prefix(const char *name) +{ + return name && !strncmp(name, KABI_PREFIX, KABI_PREFIX_LEN); +} + +enum kabi_status { + /* >0 to stop DIE processing */ + KABI_NORMAL = 1, + KABI_RESERVED, + KABI_IGNORED, +}; + +static bool do_linebreak; +static int indentation_level; + +/* Line breaks and indentation for pretty-printing */ +static void process_linebreak(struct die *cache, int n) +{ + indentation_level += n; + do_linebreak = true; + die_map_add_linebreak(cache, n); +} + +#define DEFINE_GET_ATTR(attr, type) \ + static bool get_##attr##_attr(Dwarf_Die *die, unsigned int id, \ + type *value) \ + { \ + Dwarf_Attribute da; \ + return dwarf_attr(die, id, &da) && \ + !dwarf_form##attr(&da, value); \ + } + +DEFINE_GET_ATTR(flag, bool) +DEFINE_GET_ATTR(udata, Dwarf_Word) + +static bool get_ref_die_attr(Dwarf_Die *die, unsigned int id, Dwarf_Die *value) +{ + Dwarf_Attribute da; + + /* dwarf_formref_die returns a pointer instead of an error value. */ + return dwarf_attr(die, id, &da) && dwarf_formref_die(&da, value); +} + +#define DEFINE_GET_STRING_ATTR(attr) \ + static const char *get_##attr##_attr(Dwarf_Die *die) \ + { \ + Dwarf_Attribute da; \ + if (dwarf_attr(die, DW_AT_##attr, &da)) \ + return dwarf_formstring(&da); \ + return NULL; \ + } + +DEFINE_GET_STRING_ATTR(name) +DEFINE_GET_STRING_ATTR(linkage_name) + +static const char *get_symbol_name(Dwarf_Die *die) +{ + const char *name; + + /* rustc uses DW_AT_linkage_name for exported symbols */ + name = get_linkage_name_attr(die); + if (!name) + name = get_name_attr(die); + + return name; +} + +static bool match_export_symbol(struct state *state, Dwarf_Die *die) +{ + Dwarf_Die *source = die; + Dwarf_Die origin; + + /* If the DIE has an abstract origin, use it for type information. */ + if (get_ref_die_attr(die, DW_AT_abstract_origin, &origin)) + source = &origin; + + state->sym = symbol_get(get_symbol_name(die)); + + /* Look up using the origin name if there are no matches. */ + if (!state->sym && source != die) + state->sym = symbol_get(get_symbol_name(source)); + + state->die = *source; + return !!state->sym; +} + +/* DW_AT_decl_file -> struct srcfile */ +static struct cache srcfile_cache; + +static bool is_definition_private(Dwarf_Die *die) +{ + Dwarf_Word filenum; + Dwarf_Files *files; + Dwarf_Die cudie; + const char *s; + int res; + + /* + * Definitions in .c files cannot change the public ABI, + * so consider them private. + */ + if (!get_udata_attr(die, DW_AT_decl_file, &filenum)) + return false; + + res = cache_get(&srcfile_cache, filenum); + if (res >= 0) + return !!res; + + if (!dwarf_cu_die(die->cu, &cudie, NULL, NULL, NULL, NULL, NULL, NULL)) + error("dwarf_cu_die failed: '%s'", dwarf_errmsg(-1)); + + if (dwarf_getsrcfiles(&cudie, &files, NULL)) + error("dwarf_getsrcfiles failed: '%s'", dwarf_errmsg(-1)); + + s = dwarf_filesrc(files, filenum, NULL, NULL); + if (!s) + error("dwarf_filesrc failed: '%s'", dwarf_errmsg(-1)); + + s = strrchr(s, '.'); + res = s && !strcmp(s, ".c"); + cache_set(&srcfile_cache, filenum, res); + + return !!res; +} + +static bool is_kabi_definition(struct die *cache, Dwarf_Die *die) +{ + bool value; + + if (get_flag_attr(die, DW_AT_declaration, &value) && value) + return false; + + if (kabi_is_declonly(cache->fqn)) + return false; + + return !is_definition_private(die); +} + +/* + * Type string processing + */ +static void process(struct die *cache, const char *s) +{ + s = s ?: "<null>"; + + if (dump_dies && do_linebreak) { + fputs("\n", stderr); + for (int i = 0; i < indentation_level; i++) + fputs(" ", stderr); + do_linebreak = false; + } + if (dump_dies) + fputs(s, stderr); + + if (cache) + die_debug_r("cache %p string '%s'", cache, s); + die_map_add_string(cache, s); +} + +#define MAX_FMT_BUFFER_SIZE 128 + +static void process_fmt(struct die *cache, const char *fmt, ...) +{ + char buf[MAX_FMT_BUFFER_SIZE]; + va_list args; + + va_start(args, fmt); + + if (checkp(vsnprintf(buf, sizeof(buf), fmt, args)) >= sizeof(buf)) + error("vsnprintf overflow: increase MAX_FMT_BUFFER_SIZE"); + + process(cache, buf); + va_end(args); +} + +#define MAX_FQN_SIZE 64 + +/* Get a fully qualified name from DWARF scopes */ +static char *get_fqn(Dwarf_Die *die) +{ + const char *list[MAX_FQN_SIZE]; + Dwarf_Die *scopes = NULL; + bool has_name = false; + char *fqn = NULL; + char *p; + int count = 0; + int len = 0; + int res; + int i; + + res = checkp(dwarf_getscopes_die(die, &scopes)); + if (!res) { + list[count] = get_name_attr(die); + + if (!list[count]) + return NULL; + + len += strlen(list[count]); + count++; + + goto done; + } + + for (i = res - 1; i >= 0 && count < MAX_FQN_SIZE; i--) { + if (dwarf_tag(&scopes[i]) == DW_TAG_compile_unit) + continue; + + list[count] = get_name_attr(&scopes[i]); + + if (list[count]) { + has_name = true; + } else { + list[count] = "<anonymous>"; + has_name = false; + } + + len += strlen(list[count]); + count++; + + if (i > 0) { + list[count++] = "::"; + len += 2; + } + } + + free(scopes); + + if (count == MAX_FQN_SIZE) + warn("increase MAX_FQN_SIZE: reached the maximum"); + + /* Consider the DIE unnamed if the last scope doesn't have a name */ + if (!has_name) + return NULL; +done: + fqn = xmalloc(len + 1); + *fqn = '\0'; + + p = fqn; + for (i = 0; i < count; i++) + p = stpcpy(p, list[i]); + + return fqn; +} + +static void update_fqn(struct die *cache, Dwarf_Die *die) +{ + if (!cache->fqn) + cache->fqn = get_fqn(die) ?: ""; +} + +static void process_fqn(struct die *cache, Dwarf_Die *die) +{ + update_fqn(cache, die); + if (*cache->fqn) + process(cache, " "); + process(cache, cache->fqn); +} + +#define DEFINE_PROCESS_UDATA_ATTRIBUTE(attribute) \ + static void process_##attribute##_attr(struct die *cache, \ + Dwarf_Die *die) \ + { \ + Dwarf_Word value; \ + if (get_udata_attr(die, DW_AT_##attribute, &value)) \ + process_fmt(cache, " " #attribute "(%" PRIu64 ")", \ + value); \ + } + +DEFINE_PROCESS_UDATA_ATTRIBUTE(accessibility) +DEFINE_PROCESS_UDATA_ATTRIBUTE(alignment) +DEFINE_PROCESS_UDATA_ATTRIBUTE(bit_size) +DEFINE_PROCESS_UDATA_ATTRIBUTE(byte_size) +DEFINE_PROCESS_UDATA_ATTRIBUTE(encoding) +DEFINE_PROCESS_UDATA_ATTRIBUTE(data_bit_offset) +DEFINE_PROCESS_UDATA_ATTRIBUTE(data_member_location) +DEFINE_PROCESS_UDATA_ATTRIBUTE(discr_value) + +/* Match functions -- die_match_callback_t */ +#define DEFINE_MATCH(type) \ + static bool match_##type##_type(Dwarf_Die *die) \ + { \ + return dwarf_tag(die) == DW_TAG_##type##_type; \ + } + +DEFINE_MATCH(enumerator) +DEFINE_MATCH(formal_parameter) +DEFINE_MATCH(member) +DEFINE_MATCH(subrange) + +bool match_all(Dwarf_Die *die) +{ + return true; +} + +int process_die_container(struct state *state, struct die *cache, + Dwarf_Die *die, die_callback_t func, + die_match_callback_t match) +{ + Dwarf_Die current; + int res; + + /* Track the first item in lists. */ + if (state) + state->first_list_item = true; + + res = checkp(dwarf_child(die, ¤t)); + while (!res) { + if (match(¤t)) { + /* <0 = error, 0 = continue, >0 = stop */ + res = checkp(func(state, cache, ¤t)); + if (res) + goto out; + } + + res = checkp(dwarf_siblingof(¤t, ¤t)); + } + + res = 0; +out: + if (state) + state->first_list_item = false; + + return res; +} + +static int process_type(struct state *state, struct die *parent, + Dwarf_Die *die); + +static void process_type_attr(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + Dwarf_Die type; + + if (get_ref_die_attr(die, DW_AT_type, &type)) { + check(process_type(state, cache, &type)); + return; + } + + /* Compilers can omit DW_AT_type -- print out 'void' to clarify */ + process(cache, "base_type void"); +} + +static void process_list_comma(struct state *state, struct die *cache) +{ + if (state->first_list_item) { + state->first_list_item = false; + } else { + process(cache, " ,"); + process_linebreak(cache, 0); + } +} + +/* Comma-separated with DW_AT_type */ +static void __process_list_type(struct state *state, struct die *cache, + Dwarf_Die *die, const char *type) +{ + const char *name = get_name_attr(die); + + if (stable) { + if (is_kabi_prefix(name)) + name = NULL; + state->kabi.orig_name = NULL; + } + + process_list_comma(state, cache); + process(cache, type); + process_type_attr(state, cache, die); + + if (stable && state->kabi.orig_name) + name = state->kabi.orig_name; + if (name) { + process(cache, " "); + process(cache, name); + } + + process_accessibility_attr(cache, die); + process_bit_size_attr(cache, die); + process_data_bit_offset_attr(cache, die); + process_data_member_location_attr(cache, die); +} + +#define DEFINE_PROCESS_LIST_TYPE(type) \ + static void process_##type##_type(struct state *state, \ + struct die *cache, Dwarf_Die *die) \ + { \ + __process_list_type(state, cache, die, #type " "); \ + } + +DEFINE_PROCESS_LIST_TYPE(formal_parameter) +DEFINE_PROCESS_LIST_TYPE(member) + +/* Container types with DW_AT_type */ +static void __process_type(struct state *state, struct die *cache, + Dwarf_Die *die, const char *type) +{ + process(cache, type); + process_fqn(cache, die); + process(cache, " {"); + process_linebreak(cache, 1); + process_type_attr(state, cache, die); + process_linebreak(cache, -1); + process(cache, "}"); + process_byte_size_attr(cache, die); + process_alignment_attr(cache, die); +} + +#define DEFINE_PROCESS_TYPE(type) \ + static void process_##type##_type(struct state *state, \ + struct die *cache, Dwarf_Die *die) \ + { \ + __process_type(state, cache, die, #type "_type"); \ + } + +DEFINE_PROCESS_TYPE(atomic) +DEFINE_PROCESS_TYPE(const) +DEFINE_PROCESS_TYPE(immutable) +DEFINE_PROCESS_TYPE(packed) +DEFINE_PROCESS_TYPE(pointer) +DEFINE_PROCESS_TYPE(reference) +DEFINE_PROCESS_TYPE(restrict) +DEFINE_PROCESS_TYPE(rvalue_reference) +DEFINE_PROCESS_TYPE(shared) +DEFINE_PROCESS_TYPE(template_type_parameter) +DEFINE_PROCESS_TYPE(volatile) +DEFINE_PROCESS_TYPE(typedef) + +static void process_subrange_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + Dwarf_Word count = 0; + + if (get_udata_attr(die, DW_AT_count, &count)) + process_fmt(cache, "[%" PRIu64 "]", count); + else if (get_udata_attr(die, DW_AT_upper_bound, &count)) + process_fmt(cache, "[%" PRIu64 "]", count + 1); + else + process(cache, "[]"); +} + +static void process_array_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + process(cache, "array_type"); + /* Array size */ + check(process_die_container(state, cache, die, process_type, + match_subrange_type)); + process(cache, " {"); + process_linebreak(cache, 1); + process_type_attr(state, cache, die); + process_linebreak(cache, -1); + process(cache, "}"); +} + +static void __process_subroutine_type(struct state *state, struct die *cache, + Dwarf_Die *die, const char *type) +{ + process(cache, type); + process(cache, " ("); + process_linebreak(cache, 1); + /* Parameters */ + check(process_die_container(state, cache, die, process_type, + match_formal_parameter_type)); + process_linebreak(cache, -1); + process(cache, ")"); + process_linebreak(cache, 0); + /* Return type */ + process(cache, "-> "); + process_type_attr(state, cache, die); +} + +static void process_subroutine_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + __process_subroutine_type(state, cache, die, "subroutine_type"); +} + +static void process_variant_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + process_list_comma(state, cache); + process(cache, "variant {"); + process_linebreak(cache, 1); + check(process_die_container(state, cache, die, process_type, + match_member_type)); + process_linebreak(cache, -1); + process(cache, "}"); + process_discr_value_attr(cache, die); +} + +static void process_variant_part_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + process_list_comma(state, cache); + process(cache, "variant_part {"); + process_linebreak(cache, 1); + check(process_die_container(state, cache, die, process_type, + match_all)); + process_linebreak(cache, -1); + process(cache, "}"); +} + +static int get_kabi_status(Dwarf_Die *die, const char **suffix) +{ + const char *name = get_name_attr(die); + + if (suffix) + *suffix = NULL; + + if (is_kabi_prefix(name)) { + name += KABI_PREFIX_LEN; + + if (!strncmp(name, KABI_RESERVED_PREFIX, + KABI_RESERVED_PREFIX_LEN)) + return KABI_RESERVED; + if (!strncmp(name, KABI_IGNORED_PREFIX, + KABI_IGNORED_PREFIX_LEN)) + return KABI_IGNORED; + + if (!strncmp(name, KABI_RENAMED_PREFIX, + KABI_RENAMED_PREFIX_LEN)) { + if (suffix) { + name += KABI_RENAMED_PREFIX_LEN; + *suffix = name; + } + return KABI_RESERVED; + } + } + + return KABI_NORMAL; +} + +static int check_struct_member_kabi_status(struct state *state, + struct die *__unused, Dwarf_Die *die) +{ + int res; + + assert(dwarf_tag(die) == DW_TAG_member_type); + + /* + * If the union member is a struct, expect the __kabi field to + * be the first member of the structure, i.e..: + * + * union { + * type new_member; + * struct { + * type __kabi_field; + * } + * }; + */ + res = get_kabi_status(die, &state->kabi.orig_name); + + if (res == KABI_RESERVED && + !get_ref_die_attr(die, DW_AT_type, &state->kabi.placeholder)) + error("structure member missing a type?"); + + return res; +} + +static int check_union_member_kabi_status(struct state *state, + struct die *__unused, Dwarf_Die *die) +{ + Dwarf_Die type; + int res; + + assert(dwarf_tag(die) == DW_TAG_member_type); + + if (!get_ref_die_attr(die, DW_AT_type, &type)) + error("union member missing a type?"); + + /* + * We expect a union with two members. Check if either of them + * has a __kabi name prefix, i.e.: + * + * union { + * ... + * type memberN; // <- type, N = {0,1} + * ... + * }; + * + * The member can also be a structure type, in which case we'll + * check the first structure member. + * + * In any case, stop processing after we've seen two members. + */ + res = get_kabi_status(die, &state->kabi.orig_name); + + if (res == KABI_RESERVED) + state->kabi.placeholder = type; + if (res != KABI_NORMAL) + return res; + + if (dwarf_tag(&type) == DW_TAG_structure_type) + res = checkp(process_die_container( + state, NULL, &type, check_struct_member_kabi_status, + match_member_type)); + + if (res <= KABI_NORMAL && ++state->kabi.members < 2) + return 0; /* Continue */ + + return res; +} + +static int get_union_kabi_status(Dwarf_Die *die, Dwarf_Die *placeholder, + const char **orig_name) +{ + struct state state; + int res; + + if (!stable) + return KABI_NORMAL; + + /* + * To maintain a stable kABI, distributions may choose to reserve + * space in structs for later use by adding placeholder members, + * for example: + * + * struct s { + * u32 a; + * // an 8-byte placeholder for future use + * u64 __kabi_reserved_0; + * }; + * + * When the reserved member is taken into use, the type change + * would normally cause the symbol version to change as well, but + * if the replacement uses the following convention, gendwarfksyms + * continues to use the placeholder type for versioning instead, + * thus maintaining the same symbol version: + * + * struct s { + * u32 a; + * union { + * // placeholder replaced with a new member `b` + * struct t b; + * struct { + * // the placeholder type that is still + * // used for versioning + * u64 __kabi_reserved_0; + * }; + * }; + * }; + * + * I.e., as long as the replaced member is in a union, and the + * placeholder has a __kabi_reserved name prefix, we'll continue + * to use the placeholder type (here u64) for version calculation + * instead of the union type. + * + * It's also possible to ignore new members from versioning if + * they've been added to alignment holes, for example, by + * including them in a union with another member that uses the + * __kabi_ignored name prefix: + * + * struct s { + * u32 a; + * // an alignment hole is used to add `n` + * union { + * u32 n; + * // hide the entire union member from versioning + * u8 __kabi_ignored_0; + * }; + * u64 b; + * }; + * + * Note that the user of this feature is responsible for ensuring + * that the structure actually remains ABI compatible. + */ + memset(&state.kabi, 0, sizeof(struct kabi_state)); + + res = checkp(process_die_container(&state, NULL, die, + check_union_member_kabi_status, + match_member_type)); + + if (res == KABI_RESERVED) { + if (placeholder) + *placeholder = state.kabi.placeholder; + if (orig_name) + *orig_name = state.kabi.orig_name; + } + + return res; +} + +static bool is_kabi_ignored(Dwarf_Die *die) +{ + Dwarf_Die type; + + if (!stable) + return false; + + if (!get_ref_die_attr(die, DW_AT_type, &type)) + error("member missing a type?"); + + return dwarf_tag(&type) == DW_TAG_union_type && + checkp(get_union_kabi_status(&type, NULL, NULL)) == KABI_IGNORED; +} + +static int ___process_structure_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + switch (dwarf_tag(die)) { + case DW_TAG_member: + if (is_kabi_ignored(die)) + return 0; + return check(process_type(state, cache, die)); + case DW_TAG_variant_part: + return check(process_type(state, cache, die)); + case DW_TAG_class_type: + case DW_TAG_enumeration_type: + case DW_TAG_structure_type: + case DW_TAG_template_type_parameter: + case DW_TAG_union_type: + case DW_TAG_subprogram: + /* Skip non-member types, including member functions */ + return 0; + default: + error("unexpected structure_type child: %x", dwarf_tag(die)); + } +} + +static void __process_structure_type(struct state *state, struct die *cache, + Dwarf_Die *die, const char *type, + die_callback_t process_func, + die_match_callback_t match_func) +{ + bool expand; + + process(cache, type); + process_fqn(cache, die); + process(cache, " {"); + process_linebreak(cache, 1); + + expand = state->expand.expand && is_kabi_definition(cache, die); + + if (expand) { + state->expand.current_fqn = cache->fqn; + check(process_die_container(state, cache, die, process_func, + match_func)); + } + + process_linebreak(cache, -1); + process(cache, "}"); + + if (expand) { + process_byte_size_attr(cache, die); + process_alignment_attr(cache, die); + } +} + +#define DEFINE_PROCESS_STRUCTURE_TYPE(structure) \ + static void process_##structure##_type( \ + struct state *state, struct die *cache, Dwarf_Die *die) \ + { \ + __process_structure_type(state, cache, die, \ + #structure "_type", \ + ___process_structure_type, \ + match_all); \ + } + +DEFINE_PROCESS_STRUCTURE_TYPE(class) +DEFINE_PROCESS_STRUCTURE_TYPE(structure) + +static void process_union_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + Dwarf_Die placeholder; + + int res = checkp(get_union_kabi_status(die, &placeholder, + &state->kabi.orig_name)); + + if (res == KABI_RESERVED) + check(process_type(state, cache, &placeholder)); + if (res > KABI_NORMAL) + return; + + __process_structure_type(state, cache, die, "union_type", + ___process_structure_type, match_all); +} + +static void process_enumerator_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + bool overridden = false; + Dwarf_Word value; + + if (stable) { + /* Get the fqn before we process anything */ + update_fqn(cache, die); + + if (kabi_is_enumerator_ignored(state->expand.current_fqn, + cache->fqn)) + return; + + overridden = kabi_get_enumerator_value( + state->expand.current_fqn, cache->fqn, &value); + } + + process_list_comma(state, cache); + process(cache, "enumerator"); + process_fqn(cache, die); + + if (overridden || get_udata_attr(die, DW_AT_const_value, &value)) { + process(cache, " = "); + process_fmt(cache, "%" PRIu64, value); + } +} + +static void process_enumeration_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + __process_structure_type(state, cache, die, "enumeration_type", + process_type, match_enumerator_type); +} + +static void process_base_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + process(cache, "base_type"); + process_fqn(cache, die); + process_byte_size_attr(cache, die); + process_encoding_attr(cache, die); + process_alignment_attr(cache, die); +} + +static void process_unspecified_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + /* + * These can be emitted for stand-alone assembly code, which means we + * might run into them in vmlinux.o. + */ + process(cache, "unspecified_type"); +} + +static void process_cached(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + struct die_fragment *df; + Dwarf_Die child; + + list_for_each_entry(df, &cache->fragments, list) { + switch (df->type) { + case FRAGMENT_STRING: + die_debug_b("cache %p STRING '%s'", cache, + df->data.str); + process(NULL, df->data.str); + break; + case FRAGMENT_LINEBREAK: + process_linebreak(NULL, df->data.linebreak); + break; + case FRAGMENT_DIE: + if (!dwarf_die_addr_die(dwarf_cu_getdwarf(die->cu), + (void *)df->data.addr, &child)) + error("dwarf_die_addr_die failed"); + die_debug_b("cache %p DIE addr %" PRIxPTR " tag %x", + cache, df->data.addr, dwarf_tag(&child)); + check(process_type(state, NULL, &child)); + break; + default: + error("empty die_fragment"); + } + } +} + +static void state_init(struct state *state) +{ + state->expand.expand = true; + state->expand.current_fqn = NULL; + cache_init(&state->expansion_cache); +} + +static void expansion_state_restore(struct expansion_state *state, + struct expansion_state *saved) +{ + state->expand = saved->expand; + state->current_fqn = saved->current_fqn; +} + +static void expansion_state_save(struct expansion_state *state, + struct expansion_state *saved) +{ + expansion_state_restore(saved, state); +} + +static bool is_expanded_type(int tag) +{ + return tag == DW_TAG_class_type || tag == DW_TAG_structure_type || + tag == DW_TAG_union_type || tag == DW_TAG_enumeration_type; +} + +#define PROCESS_TYPE(type) \ + case DW_TAG_##type##_type: \ + process_##type##_type(state, cache, die); \ + break; + +static int process_type(struct state *state, struct die *parent, Dwarf_Die *die) +{ + enum die_state want_state = DIE_COMPLETE; + struct die *cache; + struct expansion_state saved; + int tag = dwarf_tag(die); + + expansion_state_save(&state->expand, &saved); + + /* + * Structures and enumeration types are expanded only once per + * exported symbol. This is sufficient for detecting ABI changes + * within the structure. + */ + if (is_expanded_type(tag)) { + if (cache_was_expanded(&state->expansion_cache, die->addr)) + state->expand.expand = false; + + if (state->expand.expand) + cache_mark_expanded(&state->expansion_cache, die->addr); + else + want_state = DIE_UNEXPANDED; + } + + /* + * If we have want_state already cached, use it instead of walking + * through DWARF. + */ + cache = die_map_get(die, want_state); + + if (cache->state == want_state) { + die_debug_g("cached addr %p tag %x -- %s", die->addr, tag, + die_state_name(cache->state)); + + process_cached(state, cache, die); + die_map_add_die(parent, cache); + + expansion_state_restore(&state->expand, &saved); + return 0; + } + + die_debug_g("addr %p tag %x -- %s -> %s", die->addr, tag, + die_state_name(cache->state), die_state_name(want_state)); + + switch (tag) { + /* Type modifiers */ + PROCESS_TYPE(atomic) + PROCESS_TYPE(const) + PROCESS_TYPE(immutable) + PROCESS_TYPE(packed) + PROCESS_TYPE(pointer) + PROCESS_TYPE(reference) + PROCESS_TYPE(restrict) + PROCESS_TYPE(rvalue_reference) + PROCESS_TYPE(shared) + PROCESS_TYPE(volatile) + /* Container types */ + PROCESS_TYPE(class) + PROCESS_TYPE(structure) + PROCESS_TYPE(union) + PROCESS_TYPE(enumeration) + /* Subtypes */ + PROCESS_TYPE(enumerator) + PROCESS_TYPE(formal_parameter) + PROCESS_TYPE(member) + PROCESS_TYPE(subrange) + PROCESS_TYPE(template_type_parameter) + PROCESS_TYPE(variant) + PROCESS_TYPE(variant_part) + /* Other types */ + PROCESS_TYPE(array) + PROCESS_TYPE(base) + PROCESS_TYPE(subroutine) + PROCESS_TYPE(typedef) + PROCESS_TYPE(unspecified) + default: + error("unexpected type: %x", tag); + } + + die_debug_r("parent %p cache %p die addr %p tag %x", parent, cache, + die->addr, tag); + + /* Update cache state and append to the parent (if any) */ + cache->tag = tag; + cache->state = want_state; + die_map_add_die(parent, cache); + + expansion_state_restore(&state->expand, &saved); + return 0; +} + +/* + * Exported symbol processing + */ +static struct die *get_symbol_cache(struct state *state, Dwarf_Die *die) +{ + struct die *cache; + + cache = die_map_get(die, DIE_SYMBOL); + + if (cache->state != DIE_INCOMPLETE) + return NULL; /* We already processed a symbol for this DIE */ + + cache->tag = dwarf_tag(die); + return cache; +} + +static void process_symbol(struct state *state, Dwarf_Die *die, + die_callback_t process_func) +{ + struct die *cache; + + symbol_set_die(state->sym, die); + + cache = get_symbol_cache(state, die); + if (!cache) + return; + + debug("%s", state->sym->name); + check(process_func(state, cache, die)); + cache->state = DIE_SYMBOL; + if (dump_dies) + fputs("\n", stderr); +} + +static int __process_subprogram(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + __process_subroutine_type(state, cache, die, "subprogram"); + return 0; +} + +static void process_subprogram(struct state *state, Dwarf_Die *die) +{ + process_symbol(state, die, __process_subprogram); +} + +static int __process_variable(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + process(cache, "variable "); + process_type_attr(state, cache, die); + return 0; +} + +static void process_variable(struct state *state, Dwarf_Die *die) +{ + process_symbol(state, die, __process_variable); +} + +static void save_symbol_ptr(struct state *state) +{ + Dwarf_Die ptr_type; + Dwarf_Die type; + + if (!get_ref_die_attr(&state->die, DW_AT_type, &ptr_type) || + dwarf_tag(&ptr_type) != DW_TAG_pointer_type) + error("%s must be a pointer type!", + get_symbol_name(&state->die)); + + if (!get_ref_die_attr(&ptr_type, DW_AT_type, &type)) + error("%s pointer missing a type attribute?", + get_symbol_name(&state->die)); + + /* + * Save the symbol pointer DIE in case the actual symbol is + * missing from the DWARF. Clang, for example, intentionally + * omits external symbols from the debugging information. + */ + if (dwarf_tag(&type) == DW_TAG_subroutine_type) + symbol_set_ptr(state->sym, &type); + else + symbol_set_ptr(state->sym, &ptr_type); +} + +static int process_exported_symbols(struct state *unused, struct die *cache, + Dwarf_Die *die) +{ + int tag = dwarf_tag(die); + + switch (tag) { + /* Possible containers of exported symbols */ + case DW_TAG_namespace: + case DW_TAG_class_type: + case DW_TAG_structure_type: + return check(process_die_container( + NULL, cache, die, process_exported_symbols, match_all)); + + /* Possible exported symbols */ + case DW_TAG_subprogram: + case DW_TAG_variable: { + struct state state; + + if (!match_export_symbol(&state, die)) + return 0; + + state_init(&state); + + if (is_symbol_ptr(get_symbol_name(&state.die))) + save_symbol_ptr(&state); + else if (tag == DW_TAG_subprogram) + process_subprogram(&state, &state.die); + else + process_variable(&state, &state.die); + + cache_free(&state.expansion_cache); + return 0; + } + default: + return 0; + } +} + +static void process_symbol_ptr(struct symbol *sym, void *arg) +{ + struct state state; + Dwarf *dwarf = arg; + + if (sym->state != SYMBOL_UNPROCESSED || !sym->ptr_die_addr) + return; + + debug("%s", sym->name); + state_init(&state); + state.sym = sym; + + if (!dwarf_die_addr_die(dwarf, (void *)sym->ptr_die_addr, &state.die)) + error("dwarf_die_addr_die failed for symbol ptr: '%s'", + sym->name); + + if (dwarf_tag(&state.die) == DW_TAG_subroutine_type) + process_subprogram(&state, &state.die); + else + process_variable(&state, &state.die); + + cache_free(&state.expansion_cache); +} + +void process_cu(Dwarf_Die *cudie) +{ + check(process_die_container(NULL, NULL, cudie, process_exported_symbols, + match_all)); + + symbol_for_each(process_symbol_ptr, dwarf_cu_getdwarf(cudie->cu)); + + cache_free(&srcfile_cache); +} diff --git a/scripts/gendwarfksyms/examples/kabi.h b/scripts/gendwarfksyms/examples/kabi.h new file mode 100644 index 000000000000..97a5669b083d --- /dev/null +++ b/scripts/gendwarfksyms/examples/kabi.h @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Google LLC + * + * Example macros for maintaining kABI stability. + * + * This file is based on android_kabi.h, which has the following notice: + * + * Heavily influenced by rh_kabi.h which came from the RHEL/CENTOS kernel + * and was: + * Copyright (c) 2014 Don Zickus + * Copyright (c) 2015-2018 Jiri Benc + * Copyright (c) 2015 Sabrina Dubroca, Hannes Frederic Sowa + * Copyright (c) 2016-2018 Prarit Bhargava + * Copyright (c) 2017 Paolo Abeni, Larry Woodman + */ + +#ifndef __KABI_H__ +#define __KABI_H__ + +/* Kernel macros for userspace testing. */ +#ifndef __aligned +#define __aligned(x) __attribute__((__aligned__(x))) +#endif +#ifndef __used +#define __used __attribute__((__used__)) +#endif +#ifndef __section +#define __section(section) __attribute__((__section__(section))) +#endif +#ifndef __PASTE +#define ___PASTE(a, b) a##b +#define __PASTE(a, b) ___PASTE(a, b) +#endif +#ifndef __stringify +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) +#endif + +#define __KABI_RULE(hint, target, value) \ + static const char __PASTE(__gendwarfksyms_rule_, \ + __COUNTER__)[] __used __aligned(1) \ + __section(".discard.gendwarfksyms.kabi_rules") = \ + "1\0" #hint "\0" #target "\0" #value + +#define __KABI_NORMAL_SIZE_ALIGN(_orig, _new) \ + union { \ + _Static_assert( \ + sizeof(struct { _new; }) <= sizeof(struct { _orig; }), \ + __FILE__ ":" __stringify(__LINE__) ": " __stringify( \ + _new) " is larger than " __stringify(_orig)); \ + _Static_assert( \ + __alignof__(struct { _new; }) <= \ + __alignof__(struct { _orig; }), \ + __FILE__ ":" __stringify(__LINE__) ": " __stringify( \ + _orig) " is not aligned the same as " __stringify(_new)); \ + } + +#define __KABI_REPLACE(_orig, _new) \ + union { \ + _new; \ + struct { \ + _orig; \ + }; \ + __KABI_NORMAL_SIZE_ALIGN(_orig, _new); \ + } + +/* + * KABI_DECLONLY(fqn) + * Treat the struct/union/enum fqn as a declaration, i.e. even if + * a definition is available, don't expand the contents. + */ +#define KABI_DECLONLY(fqn) __KABI_RULE(declonly, fqn, ) + +/* + * KABI_ENUMERATOR_IGNORE(fqn, field) + * When expanding enum fqn, skip the provided field. This makes it + * possible to hide added enum fields from versioning. + */ +#define KABI_ENUMERATOR_IGNORE(fqn, field) \ + __KABI_RULE(enumerator_ignore, fqn field, ) + +/* + * KABI_ENUMERATOR_VALUE(fqn, field, value) + * When expanding enum fqn, use the provided value for the + * specified field. This makes it possible to override enumerator + * values when calculating versions. + */ +#define KABI_ENUMERATOR_VALUE(fqn, field, value) \ + __KABI_RULE(enumerator_value, fqn field, value) + +/* + * KABI_RESERVE + * Reserve some "padding" in a structure for use by LTS backports. + * This is normally placed at the end of a structure. + * number: the "number" of the padding variable in the structure. Start with + * 1 and go up. + */ +#define KABI_RESERVE(n) unsigned long __kabi_reserved##n + +/* + * KABI_RESERVE_ARRAY + * Same as _BACKPORT_RESERVE but allocates an array with the specified + * size in bytes. + */ +#define KABI_RESERVE_ARRAY(n, s) \ + unsigned char __aligned(8) __kabi_reserved##n[s] + +/* + * KABI_IGNORE + * Add a new field that's ignored in versioning. + */ +#define KABI_IGNORE(n, _new) \ + union { \ + _new; \ + unsigned char __kabi_ignored##n; \ + } + +/* + * KABI_REPLACE + * Replace a field with a compatible new field. + */ +#define KABI_REPLACE(_oldtype, _oldname, _new) \ + __KABI_REPLACE(_oldtype __kabi_renamed##_oldname, struct { _new; }) + +/* + * KABI_USE(number, _new) + * Use a previous padding entry that was defined with KABI_RESERVE + * number: the previous "number" of the padding variable + * _new: the variable to use now instead of the padding variable + */ +#define KABI_USE(number, _new) __KABI_REPLACE(KABI_RESERVE(number), _new) + +/* + * KABI_USE2(number, _new1, _new2) + * Use a previous padding entry that was defined with KABI_RESERVE for + * two new variables that fit into 64 bits. This is good for when you do not + * want to "burn" a 64bit padding variable for a smaller variable size if not + * needed. + */ +#define KABI_USE2(number, _new1, _new2) \ + __KABI_REPLACE( \ + KABI_RESERVE(number), struct { \ + _new1; \ + _new2; \ + }) +/* + * KABI_USE_ARRAY(number, bytes, _new) + * Use a previous padding entry that was defined with KABI_RESERVE_ARRAY + * number: the previous "number" of the padding variable + * bytes: the size in bytes reserved for the array + * _new: the variable to use now instead of the padding variable + */ +#define KABI_USE_ARRAY(number, bytes, _new) \ + __KABI_REPLACE(KABI_RESERVE_ARRAY(number, bytes), _new) + +#endif /* __KABI_H__ */ diff --git a/scripts/gendwarfksyms/examples/kabi_ex.c b/scripts/gendwarfksyms/examples/kabi_ex.c new file mode 100644 index 000000000000..0b7ffd830541 --- /dev/null +++ b/scripts/gendwarfksyms/examples/kabi_ex.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * kabi_ex.c + * + * Copyright (C) 2024 Google LLC + * + * Examples for kABI stability features with --stable. See kabi_ex.h + * for details. + */ + +#include "kabi_ex.h" + +struct s e0; +enum e e1; + +struct ex0a ex0a; +struct ex0b ex0b; +struct ex0c ex0c; + +struct ex1a ex1a; +struct ex1b ex1b; +struct ex1c ex1c; + +struct ex2a ex2a; +struct ex2b ex2b; +struct ex2c ex2c; + +struct ex3a ex3a; +struct ex3b ex3b; +struct ex3c ex3c; diff --git a/scripts/gendwarfksyms/examples/kabi_ex.h b/scripts/gendwarfksyms/examples/kabi_ex.h new file mode 100644 index 000000000000..1736e0f65208 --- /dev/null +++ b/scripts/gendwarfksyms/examples/kabi_ex.h @@ -0,0 +1,263 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * kabi_ex.h + * + * Copyright (C) 2024 Google LLC + * + * Examples for kABI stability features with --stable. + */ + +/* + * The comments below each example contain the expected gendwarfksyms + * output, which can be verified using LLVM's FileCheck tool: + * + * https://llvm.org/docs/CommandGuide/FileCheck.html + * + * Usage: + * + * $ gcc -g -c examples/kabi_ex.c -o examples/kabi_ex.o + * + * $ nm examples/kabi_ex.o | awk '{ print $NF }' | \ + * ./gendwarfksyms --stable --dump-dies \ + * examples/kabi_ex.o 2>&1 >/dev/null | \ + * FileCheck examples/kabi_ex.h --check-prefix=STABLE + */ + +#ifndef __KABI_EX_H__ +#define __KABI_EX_H__ + +#include "kabi.h" + +/* + * Example: kABI rules + */ + +struct s { + int a; +}; + +KABI_DECLONLY(s); + +/* + * STABLE: variable structure_type s { + * STABLE-NEXT: } + */ + +enum e { + A, + B, + C, + D, +}; + +KABI_ENUMERATOR_IGNORE(e, B); +KABI_ENUMERATOR_IGNORE(e, C); +KABI_ENUMERATOR_VALUE(e, D, 123456789); + +/* + * STABLE: variable enumeration_type e { + * STABLE-NEXT: enumerator A = 0 , + * STABLE-NEXT: enumerator D = 123456789 + * STABLE-NEXT: } byte_size(4) +*/ + +/* + * Example: Reserved fields + */ +struct ex0a { + int a; + KABI_RESERVE(0); + KABI_RESERVE(1); +}; + +/* + * STABLE: variable structure_type ex0a { + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) a data_member_location(0) , + * STABLE-NEXT: member base_type [[ULONG:long unsigned int|unsigned long]] byte_size(8) encoding(7) data_member_location(8) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) data_member_location(16) + * STABLE-NEXT: } byte_size(24) + */ + +struct ex0b { + int a; + KABI_RESERVE(0); + KABI_USE2(1, int b, int c); +}; + +/* + * STABLE: variable structure_type ex0b { + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) a data_member_location(0) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) data_member_location(8) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) data_member_location(16) + * STABLE-NEXT: } byte_size(24) + */ + +struct ex0c { + int a; + KABI_USE(0, void *p); + KABI_USE2(1, int b, int c); +}; + +/* + * STABLE: variable structure_type ex0c { + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) a data_member_location(0) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) data_member_location(8) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) data_member_location(16) + * STABLE-NEXT: } byte_size(24) + */ + +/* + * Example: A reserved array + */ + +struct ex1a { + unsigned int a; + KABI_RESERVE_ARRAY(0, 64); +}; + +/* + * STABLE: variable structure_type ex1a { + * STABLE-NEXT: member base_type unsigned int byte_size(4) encoding(7) a data_member_location(0) , + * STABLE-NEXT: member array_type[64] { + * STABLE-NEXT: base_type unsigned char byte_size(1) encoding(8) + * STABLE-NEXT: } data_member_location(8) + * STABLE-NEXT: } byte_size(72) + */ + +struct ex1b { + unsigned int a; + KABI_USE_ARRAY( + 0, 64, struct { + void *p; + KABI_RESERVE_ARRAY(1, 56); + }); +}; + +/* + * STABLE: variable structure_type ex1b { + * STABLE-NEXT: member base_type unsigned int byte_size(4) encoding(7) a data_member_location(0) , + * STABLE-NEXT: member array_type[64] { + * STABLE-NEXT: base_type unsigned char byte_size(1) encoding(8) + * STABLE-NEXT: } data_member_location(8) + * STABLE-NEXT: } byte_size(72) + */ + +struct ex1c { + unsigned int a; + KABI_USE_ARRAY(0, 64, void *p[8]); +}; + +/* + * STABLE: variable structure_type ex1c { + * STABLE-NEXT: member base_type unsigned int byte_size(4) encoding(7) a data_member_location(0) , + * STABLE-NEXT: member array_type[64] { + * STABLE-NEXT: base_type unsigned char byte_size(1) encoding(8) + * STABLE-NEXT: } data_member_location(8) + * STABLE-NEXT: } byte_size(72) + */ + +/* + * Example: An ignored field added to an alignment hole + */ + +struct ex2a { + int a; + unsigned long b; + int c; + unsigned long d; +}; + +/* + * STABLE: variable structure_type ex2a { + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) a data_member_location(0) , + * STABLE-NEXT: member base_type [[ULONG:long unsigned int|unsigned long]] byte_size(8) encoding(7) b data_member_location(8) + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) c data_member_location(16) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) d data_member_location(24) + * STABLE-NEXT: } byte_size(32) + */ + +struct ex2b { + int a; + KABI_IGNORE(0, unsigned int n); + unsigned long b; + int c; + unsigned long d; +}; + +_Static_assert(sizeof(struct ex2a) == sizeof(struct ex2b), "ex2a size doesn't match ex2b"); + +/* + * STABLE: variable structure_type ex2b { + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) a data_member_location(0) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) b data_member_location(8) + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) c data_member_location(16) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) d data_member_location(24) + * STABLE-NEXT: } byte_size(32) + */ + +struct ex2c { + int a; + KABI_IGNORE(0, unsigned int n); + unsigned long b; + int c; + KABI_IGNORE(1, unsigned int m); + unsigned long d; +}; + +_Static_assert(sizeof(struct ex2a) == sizeof(struct ex2c), "ex2a size doesn't match ex2c"); + +/* + * STABLE: variable structure_type ex2c { + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) a data_member_location(0) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) b data_member_location(8) + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) c data_member_location(16) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) d data_member_location(24) + * STABLE-NEXT: } byte_size(32) + */ + + +/* + * Example: A replaced field + */ + +struct ex3a { + unsigned long a; + unsigned long unused; +}; + +/* + * STABLE: variable structure_type ex3a { + * STABLE-NEXT: member base_type [[ULONG:long unsigned int|unsigned long]] byte_size(8) encoding(7) a data_member_location(0) + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) unused data_member_location(8) + * STABLE-NEXT: } byte_size(16) + */ + +struct ex3b { + unsigned long a; + KABI_REPLACE(unsigned long, unused, unsigned long renamed); +}; + +_Static_assert(sizeof(struct ex3a) == sizeof(struct ex3b), "ex3a size doesn't match ex3b"); + +/* + * STABLE: variable structure_type ex3b { + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) a data_member_location(0) + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) unused data_member_location(8) + * STABLE-NEXT: } byte_size(16) + */ + +struct ex3c { + unsigned long a; + KABI_REPLACE(unsigned long, unused, long replaced); +}; + +_Static_assert(sizeof(struct ex3a) == sizeof(struct ex3c), "ex3a size doesn't match ex3c"); + +/* + * STABLE: variable structure_type ex3c { + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) a data_member_location(0) + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) unused data_member_location(8) + * STABLE-NEXT: } byte_size(16) + */ + +#endif /* __KABI_EX_H__ */ diff --git a/scripts/gendwarfksyms/examples/symbolptr.c b/scripts/gendwarfksyms/examples/symbolptr.c new file mode 100644 index 000000000000..88bc1bd60da8 --- /dev/null +++ b/scripts/gendwarfksyms/examples/symbolptr.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + * + * Example for symbol pointers. When compiled with Clang, gendwarfkyms + * uses a symbol pointer for `f`. + * + * $ clang -g -c examples/symbolptr.c -o examples/symbolptr.o + * $ echo -e "f\ng\np" | ./gendwarfksyms -d examples/symbolptr.o + */ + +/* Kernel macros for userspace testing. */ +#ifndef __used +#define __used __attribute__((__used__)) +#endif +#ifndef __section +#define __section(section) __attribute__((__section__(section))) +#endif + +#define __GENDWARFKSYMS_EXPORT(sym) \ + static typeof(sym) *__gendwarfksyms_ptr_##sym __used \ + __section(".discard.gendwarfksyms") = &sym; + +extern void f(unsigned int arg); +void g(int *arg); +void g(int *arg) {} + +struct s; +extern struct s *p; + +__GENDWARFKSYMS_EXPORT(f); +__GENDWARFKSYMS_EXPORT(g); +__GENDWARFKSYMS_EXPORT(p); diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c new file mode 100644 index 000000000000..08ae61eb327e --- /dev/null +++ b/scripts/gendwarfksyms/gendwarfksyms.c @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + */ + +#include <fcntl.h> +#include <getopt.h> +#include <errno.h> +#include <stdarg.h> +#include <string.h> +#include <unistd.h> +#include "gendwarfksyms.h" + +/* + * Options + */ + +/* Print debugging information to stderr */ +int debug; +/* Dump DIE contents */ +int dump_dies; +/* Print debugging information about die_map changes */ +int dump_die_map; +/* Print out type strings (i.e. type_map) */ +int dump_types; +/* Print out expanded type strings used for symbol versions */ +int dump_versions; +/* Support kABI stability features */ +int stable; +/* Write a symtypes file */ +int symtypes; +static const char *symtypes_file; + +static void usage(void) +{ + fputs("Usage: gendwarfksyms [options] elf-object-file ... < symbol-list\n\n" + "Options:\n" + " -d, --debug Print debugging information\n" + " --dump-dies Dump DWARF DIE contents\n" + " --dump-die-map Print debugging information about die_map changes\n" + " --dump-types Dump type strings\n" + " --dump-versions Dump expanded type strings used for symbol versions\n" + " -s, --stable Support kABI stability features\n" + " -T, --symtypes file Write a symtypes file\n" + " -h, --help Print this message\n" + "\n", + stderr); +} + +static int process_module(Dwfl_Module *mod, void **userdata, const char *name, + Dwarf_Addr base, void *arg) +{ + Dwarf_Addr dwbias; + Dwarf_Die cudie; + Dwarf_CU *cu = NULL; + Dwarf *dbg; + FILE *symfile = arg; + int res; + + debug("%s", name); + dbg = dwfl_module_getdwarf(mod, &dwbias); + + /* + * Look for exported symbols in each CU, follow the DIE tree, and add + * the entries to die_map. + */ + do { + res = dwarf_get_units(dbg, cu, &cu, NULL, NULL, &cudie, NULL); + if (res < 0) + error("dwarf_get_units failed: no debugging information?"); + if (res == 1) + break; /* No more units */ + + process_cu(&cudie); + } while (cu); + + /* + * Use die_map to expand type strings, write them to `symfile`, and + * calculate symbol versions. + */ + generate_symtypes_and_versions(symfile); + die_map_free(); + + return DWARF_CB_OK; +} + +static const Dwfl_Callbacks callbacks = { + .section_address = dwfl_offline_section_address, + .find_debuginfo = dwfl_standard_find_debuginfo, +}; + +int main(int argc, char **argv) +{ + FILE *symfile = NULL; + unsigned int n; + int opt; + + static const struct option opts[] = { + { "debug", 0, NULL, 'd' }, + { "dump-dies", 0, &dump_dies, 1 }, + { "dump-die-map", 0, &dump_die_map, 1 }, + { "dump-types", 0, &dump_types, 1 }, + { "dump-versions", 0, &dump_versions, 1 }, + { "stable", 0, NULL, 's' }, + { "symtypes", 1, NULL, 'T' }, + { "help", 0, NULL, 'h' }, + { 0, 0, NULL, 0 } + }; + + while ((opt = getopt_long(argc, argv, "dsT:h", opts, NULL)) != EOF) { + switch (opt) { + case 0: + break; + case 'd': + debug = 1; + break; + case 's': + stable = 1; + break; + case 'T': + symtypes = 1; + symtypes_file = optarg; + break; + case 'h': + usage(); + return 0; + default: + usage(); + return 1; + } + } + + if (dump_die_map) + dump_dies = 1; + + if (optind >= argc) { + usage(); + error("no input files?"); + } + + symbol_read_exports(stdin); + + if (symtypes_file) { + symfile = fopen(symtypes_file, "w"); + if (!symfile) + error("fopen failed for '%s': %s", symtypes_file, + strerror(errno)); + } + + for (n = optind; n < argc; n++) { + Dwfl *dwfl; + int fd; + + fd = open(argv[n], O_RDONLY); + if (fd == -1) + error("open failed for '%s': %s", argv[n], + strerror(errno)); + + symbol_read_symtab(fd); + kabi_read_rules(fd); + + dwfl = dwfl_begin(&callbacks); + if (!dwfl) + error("dwfl_begin failed for '%s': %s", argv[n], + dwarf_errmsg(-1)); + + if (!dwfl_report_offline(dwfl, argv[n], argv[n], fd)) + error("dwfl_report_offline failed for '%s': %s", + argv[n], dwarf_errmsg(-1)); + + dwfl_report_end(dwfl, NULL, NULL); + + if (dwfl_getmodules(dwfl, &process_module, symfile, 0)) + error("dwfl_getmodules failed for '%s'", argv[n]); + + dwfl_end(dwfl); + kabi_free(); + } + + if (symfile) + check(fclose(symfile)); + + symbol_print_versions(); + symbol_free(); + + return 0; +} diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h new file mode 100644 index 000000000000..197a1a8123c6 --- /dev/null +++ b/scripts/gendwarfksyms/gendwarfksyms.h @@ -0,0 +1,296 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Google LLC + */ + +#include <dwarf.h> +#include <elfutils/libdw.h> +#include <elfutils/libdwfl.h> +#include <stdlib.h> +#include <stdio.h> + +#include <hash.h> +#include <hashtable.h> +#include <xalloc.h> + +#ifndef __GENDWARFKSYMS_H +#define __GENDWARFKSYMS_H + +/* + * Options -- in gendwarfksyms.c + */ +extern int debug; +extern int dump_dies; +extern int dump_die_map; +extern int dump_types; +extern int dump_versions; +extern int stable; +extern int symtypes; + +/* + * Output helpers + */ +#define __PREFIX "gendwarfksyms: " +#define __println(prefix, format, ...) \ + fprintf(stderr, prefix __PREFIX "%s: " format "\n", __func__, \ + ##__VA_ARGS__) + +#define debug(format, ...) \ + do { \ + if (debug) \ + __println("", format, ##__VA_ARGS__); \ + } while (0) + +#define warn(format, ...) __println("warning: ", format, ##__VA_ARGS__) +#define error(format, ...) \ + do { \ + __println("error: ", format, ##__VA_ARGS__); \ + exit(1); \ + } while (0) + +#define __die_debug(color, format, ...) \ + do { \ + if (dump_dies && dump_die_map) \ + fprintf(stderr, \ + "\033[" #color "m<" format ">\033[39m", \ + __VA_ARGS__); \ + } while (0) + +#define die_debug_r(format, ...) __die_debug(91, format, __VA_ARGS__) +#define die_debug_g(format, ...) __die_debug(92, format, __VA_ARGS__) +#define die_debug_b(format, ...) __die_debug(94, format, __VA_ARGS__) + +/* + * Error handling helpers + */ +#define __check(expr, test) \ + ({ \ + int __res = expr; \ + if (test) \ + error("`%s` failed: %d", #expr, __res); \ + __res; \ + }) + +/* Error == non-zero values */ +#define check(expr) __check(expr, __res) +/* Error == negative values */ +#define checkp(expr) __check(expr, __res < 0) + +/* Consistent aliases (DW_TAG_<type>_type) for DWARF tags */ +#define DW_TAG_enumerator_type DW_TAG_enumerator +#define DW_TAG_formal_parameter_type DW_TAG_formal_parameter +#define DW_TAG_member_type DW_TAG_member +#define DW_TAG_template_type_parameter_type DW_TAG_template_type_parameter +#define DW_TAG_typedef_type DW_TAG_typedef +#define DW_TAG_variant_part_type DW_TAG_variant_part +#define DW_TAG_variant_type DW_TAG_variant + +/* + * symbols.c + */ + +/* See symbols.c:is_symbol_ptr */ +#define SYMBOL_PTR_PREFIX "__gendwarfksyms_ptr_" +#define SYMBOL_PTR_PREFIX_LEN (sizeof(SYMBOL_PTR_PREFIX) - 1) + +static inline unsigned int addr_hash(uintptr_t addr) +{ + return hash_ptr((const void *)addr); +} + +enum symbol_state { + SYMBOL_UNPROCESSED, + SYMBOL_MAPPED, + SYMBOL_PROCESSED +}; + +struct symbol_addr { + uint32_t section; + Elf64_Addr address; +}; + +struct symbol { + const char *name; + struct symbol_addr addr; + struct hlist_node addr_hash; + struct hlist_node name_hash; + enum symbol_state state; + uintptr_t die_addr; + uintptr_t ptr_die_addr; + unsigned long crc; +}; + +typedef void (*symbol_callback_t)(struct symbol *, void *arg); + +bool is_symbol_ptr(const char *name); +void symbol_read_exports(FILE *file); +void symbol_read_symtab(int fd); +struct symbol *symbol_get(const char *name); +void symbol_set_ptr(struct symbol *sym, Dwarf_Die *ptr); +void symbol_set_die(struct symbol *sym, Dwarf_Die *die); +void symbol_set_crc(struct symbol *sym, unsigned long crc); +void symbol_for_each(symbol_callback_t func, void *arg); +void symbol_print_versions(void); +void symbol_free(void); + +/* + * die.c + */ + +enum die_state { + DIE_INCOMPLETE, + DIE_UNEXPANDED, + DIE_COMPLETE, + DIE_SYMBOL, + DIE_LAST = DIE_SYMBOL +}; + +enum die_fragment_type { + FRAGMENT_EMPTY, + FRAGMENT_STRING, + FRAGMENT_LINEBREAK, + FRAGMENT_DIE +}; + +struct die_fragment { + enum die_fragment_type type; + union { + char *str; + int linebreak; + uintptr_t addr; + } data; + struct list_head list; +}; + +#define CASE_CONST_TO_STR(name) \ + case name: \ + return #name; + +static inline const char *die_state_name(enum die_state state) +{ + switch (state) { + CASE_CONST_TO_STR(DIE_INCOMPLETE) + CASE_CONST_TO_STR(DIE_UNEXPANDED) + CASE_CONST_TO_STR(DIE_COMPLETE) + CASE_CONST_TO_STR(DIE_SYMBOL) + } + + error("unexpected die_state: %d", state); +} + +struct die { + enum die_state state; + bool mapped; + char *fqn; + int tag; + uintptr_t addr; + struct list_head fragments; + struct hlist_node hash; +}; + +typedef void (*die_map_callback_t)(struct die *, void *arg); + +int __die_map_get(uintptr_t addr, enum die_state state, struct die **res); +struct die *die_map_get(Dwarf_Die *die, enum die_state state); +void die_map_add_string(struct die *pd, const char *str); +void die_map_add_linebreak(struct die *pd, int linebreak); +void die_map_for_each(die_map_callback_t func, void *arg); +void die_map_add_die(struct die *pd, struct die *child); +void die_map_free(void); + +/* + * cache.c + */ + +#define CACHE_HASH_BITS 10 + +/* A cache for addresses we've already seen. */ +struct cache { + HASHTABLE_DECLARE(cache, 1 << CACHE_HASH_BITS); +}; + +void cache_set(struct cache *cache, unsigned long key, int value); +int cache_get(struct cache *cache, unsigned long key); +void cache_init(struct cache *cache); +void cache_free(struct cache *cache); + +static inline void __cache_mark_expanded(struct cache *cache, uintptr_t addr) +{ + cache_set(cache, addr, 1); +} + +static inline bool __cache_was_expanded(struct cache *cache, uintptr_t addr) +{ + return cache_get(cache, addr) == 1; +} + +static inline void cache_mark_expanded(struct cache *cache, void *addr) +{ + __cache_mark_expanded(cache, (uintptr_t)addr); +} + +static inline bool cache_was_expanded(struct cache *cache, void *addr) +{ + return __cache_was_expanded(cache, (uintptr_t)addr); +} + +/* + * dwarf.c + */ + +struct expansion_state { + bool expand; + const char *current_fqn; +}; + +struct kabi_state { + int members; + Dwarf_Die placeholder; + const char *orig_name; +}; + +struct state { + struct symbol *sym; + Dwarf_Die die; + + /* List expansion */ + bool first_list_item; + + /* Structure expansion */ + struct expansion_state expand; + struct cache expansion_cache; + + /* Reserved or ignored members */ + struct kabi_state kabi; +}; + +typedef int (*die_callback_t)(struct state *state, struct die *cache, + Dwarf_Die *die); +typedef bool (*die_match_callback_t)(Dwarf_Die *die); +bool match_all(Dwarf_Die *die); + +int process_die_container(struct state *state, struct die *cache, + Dwarf_Die *die, die_callback_t func, + die_match_callback_t match); + +void process_cu(Dwarf_Die *cudie); + +/* + * types.c + */ + +void generate_symtypes_and_versions(FILE *file); + +/* + * kabi.c + */ + +bool kabi_is_enumerator_ignored(const char *fqn, const char *field); +bool kabi_get_enumerator_value(const char *fqn, const char *field, + unsigned long *value); +bool kabi_is_declonly(const char *fqn); + +void kabi_read_rules(int fd); +void kabi_free(void); + +#endif /* __GENDWARFKSYMS_H */ diff --git a/scripts/gendwarfksyms/kabi.c b/scripts/gendwarfksyms/kabi.c new file mode 100644 index 000000000000..66f01fcd1607 --- /dev/null +++ b/scripts/gendwarfksyms/kabi.c @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <stdio.h> + +#include "gendwarfksyms.h" + +#define KABI_RULE_SECTION ".discard.gendwarfksyms.kabi_rules" +#define KABI_RULE_VERSION "1" + +/* + * The rule section consists of four null-terminated strings per + * entry: + * + * 1. version + * Entry format version. Must match KABI_RULE_VERSION. + * + * 2. type + * Type of the kABI rule. Must be one of the tags defined below. + * + * 3. target + * Rule-dependent target, typically the fully qualified name of + * the target DIE. + * + * 4. value + * Rule-dependent value. + */ +#define KABI_RULE_MIN_ENTRY_SIZE \ + (/* version\0 */ 2 + /* type\0 */ 2 + /* target\0" */ 1 + \ + /* value\0 */ 1) +#define KABI_RULE_EMPTY_VALUE "" + +/* + * Rule: declonly + * - For the struct/enum/union in the target field, treat it as a + * declaration only even if a definition is available. + */ +#define KABI_RULE_TAG_DECLONLY "declonly" + +/* + * Rule: enumerator_ignore + * - For the enum_field in the target field, ignore the enumerator. + */ +#define KABI_RULE_TAG_ENUMERATOR_IGNORE "enumerator_ignore" + +/* + * Rule: enumerator_value + * - For the fqn_field in the target field, set the value to the + * unsigned integer in the value field. + */ +#define KABI_RULE_TAG_ENUMERATOR_VALUE "enumerator_value" + +enum kabi_rule_type { + KABI_RULE_TYPE_UNKNOWN, + KABI_RULE_TYPE_DECLONLY, + KABI_RULE_TYPE_ENUMERATOR_IGNORE, + KABI_RULE_TYPE_ENUMERATOR_VALUE, +}; + +#define RULE_HASH_BITS 7 + +struct rule { + enum kabi_rule_type type; + const char *target; + const char *value; + struct hlist_node hash; +}; + +/* { type, target } -> struct rule */ +static HASHTABLE_DEFINE(rules, 1 << RULE_HASH_BITS); + +static inline unsigned int rule_values_hash(enum kabi_rule_type type, + const char *target) +{ + return hash_32(type) ^ hash_str(target); +} + +static inline unsigned int rule_hash(const struct rule *rule) +{ + return rule_values_hash(rule->type, rule->target); +} + +static inline const char *get_rule_field(const char **pos, ssize_t *left) +{ + const char *start = *pos; + size_t len; + + if (*left <= 0) + error("unexpected end of kABI rules"); + + len = strnlen(start, *left) + 1; + *pos += len; + *left -= len; + + return start; +} + +void kabi_read_rules(int fd) +{ + GElf_Shdr shdr_mem; + GElf_Shdr *shdr; + Elf_Data *rule_data = NULL; + Elf_Scn *scn; + Elf *elf; + size_t shstrndx; + const char *rule_str; + ssize_t left; + int i; + + const struct { + enum kabi_rule_type type; + const char *tag; + } rule_types[] = { + { + .type = KABI_RULE_TYPE_DECLONLY, + .tag = KABI_RULE_TAG_DECLONLY, + }, + { + .type = KABI_RULE_TYPE_ENUMERATOR_IGNORE, + .tag = KABI_RULE_TAG_ENUMERATOR_IGNORE, + }, + { + .type = KABI_RULE_TYPE_ENUMERATOR_VALUE, + .tag = KABI_RULE_TAG_ENUMERATOR_VALUE, + }, + }; + + if (!stable) + return; + + if (elf_version(EV_CURRENT) != EV_CURRENT) + error("elf_version failed: %s", elf_errmsg(-1)); + + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); + if (!elf) + error("elf_begin failed: %s", elf_errmsg(-1)); + + if (elf_getshdrstrndx(elf, &shstrndx) < 0) + error("elf_getshdrstrndx failed: %s", elf_errmsg(-1)); + + scn = elf_nextscn(elf, NULL); + + while (scn) { + const char *sname; + + shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + error("gelf_getshdr failed: %s", elf_errmsg(-1)); + + sname = elf_strptr(elf, shstrndx, shdr->sh_name); + if (!sname) + error("elf_strptr failed: %s", elf_errmsg(-1)); + + if (!strcmp(sname, KABI_RULE_SECTION)) { + rule_data = elf_getdata(scn, NULL); + if (!rule_data) + error("elf_getdata failed: %s", elf_errmsg(-1)); + break; + } + + scn = elf_nextscn(elf, scn); + } + + if (!rule_data) { + debug("kABI rules not found"); + check(elf_end(elf)); + return; + } + + rule_str = rule_data->d_buf; + left = shdr->sh_size; + + if (left < KABI_RULE_MIN_ENTRY_SIZE) + error("kABI rule section too small: %zd bytes", left); + + if (rule_str[left - 1] != '\0') + error("kABI rules are not null-terminated"); + + while (left > KABI_RULE_MIN_ENTRY_SIZE) { + enum kabi_rule_type type = KABI_RULE_TYPE_UNKNOWN; + const char *field; + struct rule *rule; + + /* version */ + field = get_rule_field(&rule_str, &left); + + if (strcmp(field, KABI_RULE_VERSION)) + error("unsupported kABI rule version: '%s'", field); + + /* type */ + field = get_rule_field(&rule_str, &left); + + for (i = 0; i < ARRAY_SIZE(rule_types); i++) { + if (!strcmp(field, rule_types[i].tag)) { + type = rule_types[i].type; + break; + } + } + + if (type == KABI_RULE_TYPE_UNKNOWN) + error("unsupported kABI rule type: '%s'", field); + + rule = xmalloc(sizeof(struct rule)); + + rule->type = type; + rule->target = xstrdup(get_rule_field(&rule_str, &left)); + rule->value = xstrdup(get_rule_field(&rule_str, &left)); + + hash_add(rules, &rule->hash, rule_hash(rule)); + + debug("kABI rule: type: '%s', target: '%s', value: '%s'", field, + rule->target, rule->value); + } + + if (left > 0) + warn("unexpected data at the end of the kABI rules section"); + + check(elf_end(elf)); +} + +bool kabi_is_declonly(const char *fqn) +{ + struct rule *rule; + + if (!stable) + return false; + if (!fqn || !*fqn) + return false; + + hash_for_each_possible(rules, rule, hash, + rule_values_hash(KABI_RULE_TYPE_DECLONLY, fqn)) { + if (rule->type == KABI_RULE_TYPE_DECLONLY && + !strcmp(fqn, rule->target)) + return true; + } + + return false; +} + +static char *get_enumerator_target(const char *fqn, const char *field) +{ + char *target = NULL; + + if (asprintf(&target, "%s %s", fqn, field) < 0) + error("asprintf failed for '%s %s'", fqn, field); + + return target; +} + +static unsigned long get_ulong_value(const char *value) +{ + unsigned long result = 0; + char *endptr = NULL; + + errno = 0; + result = strtoul(value, &endptr, 10); + + if (errno || *endptr) + error("invalid unsigned value '%s'", value); + + return result; +} + +bool kabi_is_enumerator_ignored(const char *fqn, const char *field) +{ + bool match = false; + struct rule *rule; + char *target; + + if (!stable) + return false; + if (!fqn || !*fqn || !field || !*field) + return false; + + target = get_enumerator_target(fqn, field); + + hash_for_each_possible( + rules, rule, hash, + rule_values_hash(KABI_RULE_TYPE_ENUMERATOR_IGNORE, target)) { + if (rule->type == KABI_RULE_TYPE_ENUMERATOR_IGNORE && + !strcmp(target, rule->target)) { + match = true; + break; + } + } + + free(target); + return match; +} + +bool kabi_get_enumerator_value(const char *fqn, const char *field, + unsigned long *value) +{ + bool match = false; + struct rule *rule; + char *target; + + if (!stable) + return false; + if (!fqn || !*fqn || !field || !*field) + return false; + + target = get_enumerator_target(fqn, field); + + hash_for_each_possible(rules, rule, hash, + rule_values_hash(KABI_RULE_TYPE_ENUMERATOR_VALUE, + target)) { + if (rule->type == KABI_RULE_TYPE_ENUMERATOR_VALUE && + !strcmp(target, rule->target)) { + *value = get_ulong_value(rule->value); + match = true; + break; + } + } + + free(target); + return match; +} + +void kabi_free(void) +{ + struct hlist_node *tmp; + struct rule *rule; + + hash_for_each_safe(rules, rule, tmp, hash) { + free((void *)rule->target); + free((void *)rule->value); + free(rule); + } + + hash_init(rules); +} diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c new file mode 100644 index 000000000000..327f87389c34 --- /dev/null +++ b/scripts/gendwarfksyms/symbols.c @@ -0,0 +1,341 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + */ + +#include "gendwarfksyms.h" + +#define SYMBOL_HASH_BITS 12 + +/* struct symbol_addr -> struct symbol */ +static HASHTABLE_DEFINE(symbol_addrs, 1 << SYMBOL_HASH_BITS); +/* name -> struct symbol */ +static HASHTABLE_DEFINE(symbol_names, 1 << SYMBOL_HASH_BITS); + +static inline unsigned int symbol_addr_hash(const struct symbol_addr *addr) +{ + return hash_32(addr->section ^ addr_hash(addr->address)); +} + +static unsigned int __for_each_addr(struct symbol *sym, symbol_callback_t func, + void *data) +{ + struct hlist_node *tmp; + struct symbol *match = NULL; + unsigned int processed = 0; + + hash_for_each_possible_safe(symbol_addrs, match, tmp, addr_hash, + symbol_addr_hash(&sym->addr)) { + if (match == sym) + continue; /* Already processed */ + + if (match->addr.section == sym->addr.section && + match->addr.address == sym->addr.address) { + func(match, data); + ++processed; + } + } + + return processed; +} + +/* + * For symbols without debugging information (e.g. symbols defined in other + * TUs), we also match __gendwarfksyms_ptr_<symbol_name> symbols, which the + * kernel uses to ensure type information is present in the TU that exports + * the symbol. A __gendwarfksyms_ptr pointer must have the same type as the + * exported symbol, e.g.: + * + * typeof(symname) *__gendwarf_ptr_symname = &symname; + */ +bool is_symbol_ptr(const char *name) +{ + return name && !strncmp(name, SYMBOL_PTR_PREFIX, SYMBOL_PTR_PREFIX_LEN); +} + +static unsigned int for_each(const char *name, symbol_callback_t func, + void *data) +{ + struct hlist_node *tmp; + struct symbol *match; + + if (!name || !*name) + return 0; + if (is_symbol_ptr(name)) + name += SYMBOL_PTR_PREFIX_LEN; + + hash_for_each_possible_safe(symbol_names, match, tmp, name_hash, + hash_str(name)) { + if (strcmp(match->name, name)) + continue; + + /* Call func for the match, and all address matches */ + if (func) + func(match, data); + + if (match->addr.section != SHN_UNDEF) + return __for_each_addr(match, func, data) + 1; + + return 1; + } + + return 0; +} + +static void set_crc(struct symbol *sym, void *data) +{ + unsigned long *crc = data; + + if (sym->state == SYMBOL_PROCESSED && sym->crc != *crc) + warn("overriding version for symbol %s (crc %lx vs. %lx)", + sym->name, sym->crc, *crc); + + sym->state = SYMBOL_PROCESSED; + sym->crc = *crc; +} + +void symbol_set_crc(struct symbol *sym, unsigned long crc) +{ + if (for_each(sym->name, set_crc, &crc) == 0) + error("no matching symbols: '%s'", sym->name); +} + +static void set_ptr(struct symbol *sym, void *data) +{ + sym->ptr_die_addr = (uintptr_t)((Dwarf_Die *)data)->addr; +} + +void symbol_set_ptr(struct symbol *sym, Dwarf_Die *ptr) +{ + if (for_each(sym->name, set_ptr, ptr) == 0) + error("no matching symbols: '%s'", sym->name); +} + +static void set_die(struct symbol *sym, void *data) +{ + sym->die_addr = (uintptr_t)((Dwarf_Die *)data)->addr; + sym->state = SYMBOL_MAPPED; +} + +void symbol_set_die(struct symbol *sym, Dwarf_Die *die) +{ + if (for_each(sym->name, set_die, die) == 0) + error("no matching symbols: '%s'", sym->name); +} + +static bool is_exported(const char *name) +{ + return for_each(name, NULL, NULL) > 0; +} + +void symbol_read_exports(FILE *file) +{ + struct symbol *sym; + char *line = NULL; + char *name = NULL; + size_t size = 0; + int nsym = 0; + + while (getline(&line, &size, file) > 0) { + if (sscanf(line, "%ms\n", &name) != 1) + error("malformed input line: %s", line); + + if (is_exported(name)) { + /* Ignore duplicates */ + free(name); + continue; + } + + sym = xcalloc(1, sizeof(struct symbol)); + sym->name = name; + sym->addr.section = SHN_UNDEF; + sym->state = SYMBOL_UNPROCESSED; + + hash_add(symbol_names, &sym->name_hash, hash_str(sym->name)); + ++nsym; + + debug("%s", sym->name); + } + + free(line); + debug("%d exported symbols", nsym); +} + +static void get_symbol(struct symbol *sym, void *arg) +{ + struct symbol **res = arg; + + if (sym->state == SYMBOL_UNPROCESSED) + *res = sym; +} + +struct symbol *symbol_get(const char *name) +{ + struct symbol *sym = NULL; + + for_each(name, get_symbol, &sym); + return sym; +} + +void symbol_for_each(symbol_callback_t func, void *arg) +{ + struct hlist_node *tmp; + struct symbol *sym; + + hash_for_each_safe(symbol_names, sym, tmp, name_hash) { + func(sym, arg); + } +} + +typedef void (*elf_symbol_callback_t)(const char *name, GElf_Sym *sym, + Elf32_Word xndx, void *arg); + +static void elf_for_each_global(int fd, elf_symbol_callback_t func, void *arg) +{ + size_t sym_size; + GElf_Shdr shdr_mem; + GElf_Shdr *shdr; + Elf_Data *xndx_data = NULL; + Elf_Scn *scn; + Elf *elf; + + if (elf_version(EV_CURRENT) != EV_CURRENT) + error("elf_version failed: %s", elf_errmsg(-1)); + + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); + if (!elf) + error("elf_begin failed: %s", elf_errmsg(-1)); + + scn = elf_nextscn(elf, NULL); + + while (scn) { + shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + error("gelf_getshdr failed: %s", elf_errmsg(-1)); + + if (shdr->sh_type == SHT_SYMTAB_SHNDX) { + xndx_data = elf_getdata(scn, NULL); + if (!xndx_data) + error("elf_getdata failed: %s", elf_errmsg(-1)); + break; + } + + scn = elf_nextscn(elf, scn); + } + + sym_size = gelf_fsize(elf, ELF_T_SYM, 1, EV_CURRENT); + scn = elf_nextscn(elf, NULL); + + while (scn) { + shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + error("gelf_getshdr failed: %s", elf_errmsg(-1)); + + if (shdr->sh_type == SHT_SYMTAB) { + unsigned int nsyms; + unsigned int n; + Elf_Data *data = elf_getdata(scn, NULL); + + if (!data) + error("elf_getdata failed: %s", elf_errmsg(-1)); + + if (shdr->sh_entsize != sym_size) + error("expected sh_entsize (%lu) to be %zu", + shdr->sh_entsize, sym_size); + + nsyms = shdr->sh_size / shdr->sh_entsize; + + for (n = 1; n < nsyms; ++n) { + const char *name = NULL; + Elf32_Word xndx = 0; + GElf_Sym sym_mem; + GElf_Sym *sym; + + sym = gelf_getsymshndx(data, xndx_data, n, + &sym_mem, &xndx); + if (!sym) + error("gelf_getsymshndx failed: %s", + elf_errmsg(-1)); + + if (GELF_ST_BIND(sym->st_info) == STB_LOCAL) + continue; + + if (sym->st_shndx != SHN_XINDEX) + xndx = sym->st_shndx; + + name = elf_strptr(elf, shdr->sh_link, + sym->st_name); + if (!name) + error("elf_strptr failed: %s", + elf_errmsg(-1)); + + /* Skip empty symbol names */ + if (*name) + func(name, sym, xndx, arg); + } + } + + scn = elf_nextscn(elf, scn); + } + + check(elf_end(elf)); +} + +static void set_symbol_addr(struct symbol *sym, void *arg) +{ + struct symbol_addr *addr = arg; + + if (sym->addr.section == SHN_UNDEF) { + sym->addr = *addr; + hash_add(symbol_addrs, &sym->addr_hash, + symbol_addr_hash(&sym->addr)); + + debug("%s -> { %u, %lx }", sym->name, sym->addr.section, + sym->addr.address); + } else if (sym->addr.section != addr->section || + sym->addr.address != addr->address) { + warn("multiple addresses for symbol %s?", sym->name); + } +} + +static void elf_set_symbol_addr(const char *name, GElf_Sym *sym, + Elf32_Word xndx, void *arg) +{ + struct symbol_addr addr = { .section = xndx, .address = sym->st_value }; + + /* Set addresses for exported symbols */ + if (addr.section != SHN_UNDEF) + for_each(name, set_symbol_addr, &addr); +} + +void symbol_read_symtab(int fd) +{ + elf_for_each_global(fd, elf_set_symbol_addr, NULL); +} + +void symbol_print_versions(void) +{ + struct hlist_node *tmp; + struct symbol *sym; + + hash_for_each_safe(symbol_names, sym, tmp, name_hash) { + if (sym->state != SYMBOL_PROCESSED) + warn("no information for symbol %s", sym->name); + + printf("#SYMVER %s 0x%08lx\n", sym->name, sym->crc); + } +} + +void symbol_free(void) +{ + struct hlist_node *tmp; + struct symbol *sym; + + hash_for_each_safe(symbol_names, sym, tmp, name_hash) { + free((void *)sym->name); + free(sym); + } + + hash_init(symbol_addrs); + hash_init(symbol_names); +} diff --git a/scripts/gendwarfksyms/types.c b/scripts/gendwarfksyms/types.c new file mode 100644 index 000000000000..6c03265f4d10 --- /dev/null +++ b/scripts/gendwarfksyms/types.c @@ -0,0 +1,481 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + */ + +#define _GNU_SOURCE +#include <inttypes.h> +#include <stdio.h> +#include <zlib.h> + +#include "gendwarfksyms.h" + +static struct cache expansion_cache; + +/* + * A simple linked list of shared or owned strings to avoid copying strings + * around when not necessary. + */ +struct type_list_entry { + const char *str; + void *owned; + struct list_head list; +}; + +static void type_list_free(struct list_head *list) +{ + struct type_list_entry *entry; + struct type_list_entry *tmp; + + list_for_each_entry_safe(entry, tmp, list, list) { + if (entry->owned) + free(entry->owned); + free(entry); + } + + INIT_LIST_HEAD(list); +} + +static int type_list_append(struct list_head *list, const char *s, void *owned) +{ + struct type_list_entry *entry; + + if (!s) + return 0; + + entry = xmalloc(sizeof(struct type_list_entry)); + entry->str = s; + entry->owned = owned; + list_add_tail(&entry->list, list); + + return strlen(entry->str); +} + +static void type_list_write(struct list_head *list, FILE *file) +{ + struct type_list_entry *entry; + + list_for_each_entry(entry, list, list) { + if (entry->str) + checkp(fputs(entry->str, file)); + } +} + +/* + * An expanded type string in symtypes format. + */ +struct type_expansion { + char *name; + size_t len; + struct list_head expanded; + struct hlist_node hash; +}; + +static void type_expansion_init(struct type_expansion *type) +{ + type->name = NULL; + type->len = 0; + INIT_LIST_HEAD(&type->expanded); +} + +static inline void type_expansion_free(struct type_expansion *type) +{ + free(type->name); + type->name = NULL; + type->len = 0; + type_list_free(&type->expanded); +} + +static void type_expansion_append(struct type_expansion *type, const char *s, + void *owned) +{ + type->len += type_list_append(&type->expanded, s, owned); +} + +/* + * type_map -- the longest expansions for each type. + * + * const char *name -> struct type_expansion * + */ +#define TYPE_HASH_BITS 12 +static HASHTABLE_DEFINE(type_map, 1 << TYPE_HASH_BITS); + +static int type_map_get(const char *name, struct type_expansion **res) +{ + struct type_expansion *e; + + hash_for_each_possible(type_map, e, hash, hash_str(name)) { + if (!strcmp(name, e->name)) { + *res = e; + return 0; + } + } + + return -1; +} + +static void type_map_add(const char *name, struct type_expansion *type) +{ + struct type_expansion *e; + + if (type_map_get(name, &e)) { + e = xmalloc(sizeof(struct type_expansion)); + type_expansion_init(e); + e->name = xstrdup(name); + + hash_add(type_map, &e->hash, hash_str(e->name)); + + if (dump_types) + debug("adding %s", e->name); + } else { + /* Use the longest available expansion */ + if (type->len <= e->len) + return; + + type_list_free(&e->expanded); + + if (dump_types) + debug("replacing %s", e->name); + } + + /* Take ownership of type->expanded */ + list_replace_init(&type->expanded, &e->expanded); + e->len = type->len; + + if (dump_types) { + checkp(fputs(e->name, stderr)); + checkp(fputs(" ", stderr)); + type_list_write(&e->expanded, stderr); + checkp(fputs("\n", stderr)); + } +} + +static void type_map_write(FILE *file) +{ + struct type_expansion *e; + struct hlist_node *tmp; + + if (!file) + return; + + hash_for_each_safe(type_map, e, tmp, hash) { + checkp(fputs(e->name, file)); + checkp(fputs(" ", file)); + type_list_write(&e->expanded, file); + checkp(fputs("\n", file)); + } +} + +static void type_map_free(void) +{ + struct type_expansion *e; + struct hlist_node *tmp; + + hash_for_each_safe(type_map, e, tmp, hash) { + type_expansion_free(e); + free(e); + } + + hash_init(type_map); +} + +/* + * CRC for a type, with an optional fully expanded type string for + * debugging. + */ +struct version { + struct type_expansion type; + unsigned long crc; +}; + +static void version_init(struct version *version) +{ + version->crc = crc32(0, NULL, 0); + type_expansion_init(&version->type); +} + +static void version_free(struct version *version) +{ + type_expansion_free(&version->type); +} + +static void version_add(struct version *version, const char *s) +{ + version->crc = crc32(version->crc, (void *)s, strlen(s)); + if (dump_versions) + type_expansion_append(&version->type, s, NULL); +} + +/* + * Type reference format: <prefix>#<name>, where prefix: + * s -> structure + * u -> union + * e -> enum + * t -> typedef + * + * Names with spaces are additionally wrapped in single quotes. + */ +static inline bool is_type_prefix(const char *s) +{ + return (s[0] == 's' || s[0] == 'u' || s[0] == 'e' || s[0] == 't') && + s[1] == '#'; +} + +static char get_type_prefix(int tag) +{ + switch (tag) { + case DW_TAG_class_type: + case DW_TAG_structure_type: + return 's'; + case DW_TAG_union_type: + return 'u'; + case DW_TAG_enumeration_type: + return 'e'; + case DW_TAG_typedef_type: + return 't'; + default: + return 0; + } +} + +static char *get_type_name(struct die *cache) +{ + const char *quote; + char prefix; + char *name; + + if (cache->state == DIE_INCOMPLETE) { + warn("found incomplete cache entry: %p", cache); + return NULL; + } + if (cache->state == DIE_SYMBOL) + return NULL; + if (!cache->fqn || !*cache->fqn) + return NULL; + + prefix = get_type_prefix(cache->tag); + if (!prefix) + return NULL; + + /* Wrap names with spaces in single quotes */ + quote = strstr(cache->fqn, " ") ? "'" : ""; + + /* <prefix>#<type_name>\0 */ + if (asprintf(&name, "%c#%s%s%s", prefix, quote, cache->fqn, quote) < 0) + error("asprintf failed for '%s'", cache->fqn); + + return name; +} + +static void __calculate_version(struct version *version, struct list_head *list) +{ + struct type_list_entry *entry; + struct type_expansion *e; + + /* Calculate a CRC over an expanded type string */ + list_for_each_entry(entry, list, list) { + if (is_type_prefix(entry->str)) { + check(type_map_get(entry->str, &e)); + + /* + * It's sufficient to expand each type reference just + * once to detect changes. + */ + if (cache_was_expanded(&expansion_cache, e)) { + version_add(version, entry->str); + } else { + cache_mark_expanded(&expansion_cache, e); + __calculate_version(version, &e->expanded); + } + } else { + version_add(version, entry->str); + } + } +} + +static void calculate_version(struct version *version, struct list_head *list) +{ + version_init(version); + __calculate_version(version, list); + cache_free(&expansion_cache); +} + +static void __type_expand(struct die *cache, struct type_expansion *type, + bool recursive); + +static void type_expand_child(struct die *cache, struct type_expansion *type, + bool recursive) +{ + struct type_expansion child; + char *name; + + name = get_type_name(cache); + if (!name) { + __type_expand(cache, type, recursive); + return; + } + + if (recursive && !__cache_was_expanded(&expansion_cache, cache->addr)) { + __cache_mark_expanded(&expansion_cache, cache->addr); + type_expansion_init(&child); + __type_expand(cache, &child, true); + type_map_add(name, &child); + type_expansion_free(&child); + } + + type_expansion_append(type, name, name); +} + +static void __type_expand(struct die *cache, struct type_expansion *type, + bool recursive) +{ + struct die_fragment *df; + struct die *child; + + list_for_each_entry(df, &cache->fragments, list) { + switch (df->type) { + case FRAGMENT_STRING: + type_expansion_append(type, df->data.str, NULL); + break; + case FRAGMENT_DIE: + /* Use a complete die_map expansion if available */ + if (__die_map_get(df->data.addr, DIE_COMPLETE, + &child) && + __die_map_get(df->data.addr, DIE_UNEXPANDED, + &child)) + error("unknown child: %" PRIxPTR, + df->data.addr); + + type_expand_child(child, type, recursive); + break; + case FRAGMENT_LINEBREAK: + /* + * Keep whitespace in the symtypes format, but avoid + * repeated spaces. + */ + if (list_is_last(&df->list, &cache->fragments) || + list_next_entry(df, list)->type != + FRAGMENT_LINEBREAK) + type_expansion_append(type, " ", NULL); + break; + default: + error("empty die_fragment in %p", cache); + } + } +} + +static void type_expand(struct die *cache, struct type_expansion *type, + bool recursive) +{ + type_expansion_init(type); + __type_expand(cache, type, recursive); + cache_free(&expansion_cache); +} + +static void expand_type(struct die *cache, void *arg) +{ + struct type_expansion type; + char *name; + + if (cache->mapped) + return; + + cache->mapped = true; + + /* + * Skip unexpanded die_map entries if there's a complete + * expansion available for this DIE. + */ + if (cache->state == DIE_UNEXPANDED && + !__die_map_get(cache->addr, DIE_COMPLETE, &cache)) { + if (cache->mapped) + return; + + cache->mapped = true; + } + + name = get_type_name(cache); + if (!name) + return; + + debug("%s", name); + type_expand(cache, &type, true); + type_map_add(name, &type); + + type_expansion_free(&type); + free(name); +} + +static void expand_symbol(struct symbol *sym, void *arg) +{ + struct type_expansion type; + struct version version; + struct die *cache; + + /* + * No need to expand again unless we want a symtypes file entry + * for the symbol. Note that this means `sym` has the same address + * as another symbol that was already processed. + */ + if (!symtypes && sym->state == SYMBOL_PROCESSED) + return; + + if (__die_map_get(sym->die_addr, DIE_SYMBOL, &cache)) + return; /* We'll warn about missing CRCs later. */ + + type_expand(cache, &type, false); + + /* If the symbol already has a version, don't calculate it again. */ + if (sym->state != SYMBOL_PROCESSED) { + calculate_version(&version, &type.expanded); + symbol_set_crc(sym, version.crc); + debug("%s = %lx", sym->name, version.crc); + + if (dump_versions) { + checkp(fputs(sym->name, stderr)); + checkp(fputs(" ", stderr)); + type_list_write(&version.type.expanded, stderr); + checkp(fputs("\n", stderr)); + } + + version_free(&version); + } + + /* These aren't needed in type_map unless we want a symtypes file. */ + if (symtypes) + type_map_add(sym->name, &type); + + type_expansion_free(&type); +} + +void generate_symtypes_and_versions(FILE *file) +{ + cache_init(&expansion_cache); + + /* + * die_map processing: + * + * 1. die_map contains all types referenced in exported symbol + * signatures, but can contain duplicates just like the original + * DWARF, and some references may not be fully expanded depending + * on how far we processed the DIE tree for that specific symbol. + * + * For each die_map entry, find the longest available expansion, + * and add it to type_map. + */ + die_map_for_each(expand_type, NULL); + + /* + * 2. For each exported symbol, expand the die_map type, and use + * type_map expansions to calculate a symbol version from the + * fully expanded type string. + */ + symbol_for_each(expand_symbol, NULL); + + /* + * 3. If a symtypes file is requested, write type_map contents to + * the file. + */ + type_map_write(file); + type_map_free(); +} diff --git a/scripts/genksyms/Makefile b/scripts/genksyms/Makefile index 312edccda736..4350311fb7b3 100644 --- a/scripts/genksyms/Makefile +++ b/scripts/genksyms/Makefile @@ -4,24 +4,6 @@ hostprogs-always-y += genksyms genksyms-objs := genksyms.o parse.tab.o lex.lex.o -# FIXME: fix the ambiguous grammar in parse.y and delete this hack -# -# Suppress shift/reduce, reduce/reduce conflicts warnings -# unless W=1 is specified. -# -# Just in case, run "$(YACC) --version" without suppressing stderr -# so that 'bison: not found' will be displayed if it is missing. -ifeq ($(findstring 1,$(KBUILD_EXTRA_WARN)),) - -quiet_cmd_bison_no_warn = $(quiet_cmd_bison) - cmd_bison_no_warn = $(YACC) --version >/dev/null; \ - $(cmd_bison) 2>/dev/null - -$(obj)/pars%.tab.c $(obj)/pars%.tab.h: $(src)/pars%.y FORCE - $(call if_changed,bison_no_warn) - -endif - # -I needed for generated C source to include headers in source tree HOSTCFLAGS_parse.tab.o := -I $(src) HOSTCFLAGS_lex.lex.o := -I $(src) diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c index 07f9b8cfb233..8b0d7ac73dbb 100644 --- a/scripts/genksyms/genksyms.c +++ b/scripts/genksyms/genksyms.c @@ -12,18 +12,19 @@ #include <stdio.h> #include <string.h> +#include <stdint.h> #include <stdlib.h> #include <unistd.h> #include <assert.h> #include <stdarg.h> #include <getopt.h> +#include <hashtable.h> + #include "genksyms.h" /*----------------------------------------------------------------------*/ -#define HASH_BUCKETS 4096 - -static struct symbol *symtab[HASH_BUCKETS]; +static HASHTABLE_DEFINE(symbol_hashtable, 1U << 12); static FILE *debugfile; int cur_line = 1; @@ -60,7 +61,7 @@ static void print_type_name(enum symbol_type type, const char *name); /*----------------------------------------------------------------------*/ -static const unsigned int crctab32[] = { +static const uint32_t crctab32[] = { 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U, 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U, 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U, @@ -115,19 +116,19 @@ static const unsigned int crctab32[] = { 0x2d02ef8dU }; -static unsigned long partial_crc32_one(unsigned char c, unsigned long crc) +static uint32_t partial_crc32_one(uint8_t c, uint32_t crc) { return crctab32[(crc ^ c) & 0xff] ^ (crc >> 8); } -static unsigned long partial_crc32(const char *s, unsigned long crc) +static uint32_t partial_crc32(const char *s, uint32_t crc) { while (*s) crc = partial_crc32_one(*s++, crc); return crc; } -static unsigned long crc32(const char *s) +static uint32_t crc32(const char *s) { return partial_crc32(s, 0xffffffff) ^ 0xffffffff; } @@ -151,14 +152,14 @@ static enum symbol_type map_to_ns(enum symbol_type t) struct symbol *find_symbol(const char *name, enum symbol_type ns, int exact) { - unsigned long h = crc32(name) % HASH_BUCKETS; struct symbol *sym; - for (sym = symtab[h]; sym; sym = sym->hash_next) + hash_for_each_possible(symbol_hashtable, sym, hnode, crc32(name)) { if (map_to_ns(sym->type) == map_to_ns(ns) && strcmp(name, sym->name) == 0 && sym->is_declared) break; + } if (exact && sym && sym->type != ns) return NULL; @@ -224,64 +225,56 @@ static struct symbol *__add_symbol(const char *name, enum symbol_type type, return NULL; } - h = crc32(name) % HASH_BUCKETS; - for (sym = symtab[h]; sym; sym = sym->hash_next) { - if (map_to_ns(sym->type) == map_to_ns(type) && - strcmp(name, sym->name) == 0) { - if (is_reference) - /* fall through */ ; - else if (sym->type == type && - equal_list(sym->defn, defn)) { - if (!sym->is_declared && sym->is_override) { - print_location(); - print_type_name(type, name); - fprintf(stderr, " modversion is " - "unchanged\n"); - } - sym->is_declared = 1; - return sym; - } else if (!sym->is_declared) { - if (sym->is_override && flag_preserve) { - print_location(); - fprintf(stderr, "ignoring "); - print_type_name(type, name); - fprintf(stderr, " modversion change\n"); - sym->is_declared = 1; - return sym; - } else { - status = is_unknown_symbol(sym) ? - STATUS_DEFINED : STATUS_MODIFIED; - } - } else { - error_with_pos("redefinition of %s", name); - return sym; + h = crc32(name); + hash_for_each_possible(symbol_hashtable, sym, hnode, h) { + if (map_to_ns(sym->type) != map_to_ns(type) || + strcmp(name, sym->name)) + continue; + + if (is_reference) { + break; + } else if (sym->type == type && equal_list(sym->defn, defn)) { + if (!sym->is_declared && sym->is_override) { + print_location(); + print_type_name(type, name); + fprintf(stderr, " modversion is unchanged\n"); } + sym->is_declared = 1; + } else if (sym->is_declared) { + error_with_pos("redefinition of %s", name); + } else if (sym->is_override && flag_preserve) { + print_location(); + fprintf(stderr, "ignoring "); + print_type_name(type, name); + fprintf(stderr, " modversion change\n"); + sym->is_declared = 1; + } else { + status = is_unknown_symbol(sym) ? + STATUS_DEFINED : STATUS_MODIFIED; break; } + free_list(defn, NULL); + return sym; } if (sym) { - struct symbol **psym; + hash_del(&sym->hnode); - for (psym = &symtab[h]; *psym; psym = &(*psym)->hash_next) { - if (*psym == sym) { - *psym = sym->hash_next; - break; - } - } + free_list(sym->defn, NULL); + free(sym->name); + free(sym); --nsyms; } sym = xmalloc(sizeof(*sym)); - sym->name = name; + sym->name = xstrdup(name); sym->type = type; sym->defn = defn; sym->expansion_trail = NULL; sym->visited = NULL; sym->is_extern = is_extern; - sym->hash_next = symtab[h]; - symtab[h] = sym; + hash_add(symbol_hashtable, &sym->hnode, h); sym->is_declared = !is_reference; sym->status = status; @@ -480,7 +473,7 @@ static void read_reference(FILE *f) defn = def; def = read_node(f); } - subsym = add_reference_symbol(xstrdup(sym->string), sym->tag, + subsym = add_reference_symbol(sym->string, sym->tag, defn, is_extern); subsym->is_override = is_override; free_node(sym); @@ -525,7 +518,7 @@ static void print_list(FILE * f, struct string_list *list) } } -static unsigned long expand_and_crc_sym(struct symbol *sym, unsigned long crc) +static uint32_t expand_and_crc_sym(struct symbol *sym, uint32_t crc) { struct string_list *list = sym->defn; struct string_list **e, **b; @@ -632,7 +625,7 @@ static unsigned long expand_and_crc_sym(struct symbol *sym, unsigned long crc) void export_symbol(const char *name) { struct symbol *sym; - unsigned long crc; + uint32_t crc; int has_changed = 0; sym = find_symbol(name, SYM_NORMAL, 0); @@ -680,7 +673,7 @@ void export_symbol(const char *name) if (flag_dump_defs) fputs(">\n", debugfile); - printf("#SYMVER %s 0x%08lx\n", name, crc); + printf("#SYMVER %s 0x%08lx\n", name, (unsigned long)crc); } /*----------------------------------------------------------------------*/ @@ -832,9 +825,9 @@ int main(int argc, char **argv) } if (flag_debug) { - fprintf(debugfile, "Hash table occupancy %d/%d = %g\n", - nsyms, HASH_BUCKETS, - (double)nsyms / (double)HASH_BUCKETS); + fprintf(debugfile, "Hash table occupancy %d/%zd = %g\n", + nsyms, HASH_SIZE(symbol_hashtable), + (double)nsyms / HASH_SIZE(symbol_hashtable)); } if (dumpfile) diff --git a/scripts/genksyms/genksyms.h b/scripts/genksyms/genksyms.h index 21ed2ec2d98c..0c355075f0e6 100644 --- a/scripts/genksyms/genksyms.h +++ b/scripts/genksyms/genksyms.h @@ -12,8 +12,11 @@ #ifndef MODUTILS_GENKSYMS_H #define MODUTILS_GENKSYMS_H 1 +#include <stdbool.h> #include <stdio.h> +#include <list_types.h> + enum symbol_type { SYM_NORMAL, SYM_TYPEDEF, SYM_ENUM, SYM_STRUCT, SYM_UNION, SYM_ENUM_CONST @@ -31,8 +34,8 @@ struct string_list { }; struct symbol { - struct symbol *hash_next; - const char *name; + struct hlist_node hnode; + char *name; enum symbol_type type; struct string_list *defn; struct symbol *expansion_trail; @@ -64,6 +67,8 @@ struct string_list *copy_list_range(struct string_list *start, int yylex(void); int yyparse(void); +extern bool dont_want_type_specifier; + void error_with_pos(const char *, ...) __attribute__ ((format(printf, 1, 2))); /*----------------------------------------------------------------------*/ diff --git a/scripts/genksyms/lex.l b/scripts/genksyms/lex.l index a4d7495eaf75..22aeb57649d9 100644 --- a/scripts/genksyms/lex.l +++ b/scripts/genksyms/lex.l @@ -12,6 +12,7 @@ %{ #include <limits.h> +#include <stdbool.h> #include <stdlib.h> #include <string.h> #include <ctype.h> @@ -50,6 +51,7 @@ MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) %% +u?int(8|16|32|64)x(1|2|4|8|16)_t return BUILTIN_INT_KEYW; /* Keep track of our location in the original source files. */ ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME; @@ -113,6 +115,12 @@ MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) /* The second stage lexer. Here we incorporate knowledge of the state of the parser to tailor the tokens that are returned. */ +/* + * The lexer cannot distinguish whether a typedef'ed string is a TYPE or an + * IDENT. We need a hint from the parser to handle this accurately. + */ +bool dont_want_type_specifier; + int yylex(void) { @@ -207,7 +215,7 @@ repeat: goto repeat; } } - if (!suppress_type_lookup) + if (!suppress_type_lookup && !dont_want_type_specifier) { if (find_symbol(yytext, SYM_TYPEDEF, 1)) token = TYPE; @@ -431,7 +439,12 @@ fini: if (suppress_type_lookup > 0) --suppress_type_lookup; - if (dont_want_brace_phrase > 0) + + /* + * __attribute__() can be placed immediately after the 'struct' keyword. + * e.g.) struct __attribute__((__packed__)) foo { ... }; + */ + if (token != ATTRIBUTE_PHRASE && dont_want_brace_phrase > 0) --dont_want_brace_phrase; yylval = &next_node->next; diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y index 8e9b5e69e8f0..ee600a804fa1 100644 --- a/scripts/genksyms/parse.y +++ b/scripts/genksyms/parse.y @@ -12,6 +12,7 @@ %{ #include <assert.h> +#include <stdbool.h> #include <stdlib.h> #include <string.h> #include "genksyms.h" @@ -148,32 +149,45 @@ simple_declaration: current_name = NULL; } $$ = $3; + dont_want_type_specifier = false; } ; init_declarator_list_opt: - /* empty */ { $$ = NULL; } - | init_declarator_list + /* empty */ { $$ = NULL; } + | init_declarator_list { free_list(decl_spec, NULL); $$ = $1; } ; init_declarator_list: init_declarator { struct string_list *decl = *$1; *$1 = NULL; + + /* avoid sharing among multiple init_declarators */ + if (decl_spec) + decl_spec = copy_list_range(decl_spec, NULL); + add_symbol(current_name, is_typedef ? SYM_TYPEDEF : SYM_NORMAL, decl, is_extern); current_name = NULL; $$ = $1; + dont_want_type_specifier = true; } - | init_declarator_list ',' init_declarator - { struct string_list *decl = *$3; - *$3 = NULL; + | init_declarator_list ',' attribute_opt init_declarator + { struct string_list *decl = *$4; + *$4 = NULL; free_list(*$2, NULL); *$2 = decl_spec; + + /* avoid sharing among multiple init_declarators */ + if (decl_spec) + decl_spec = copy_list_range(decl_spec, NULL); + add_symbol(current_name, is_typedef ? SYM_TYPEDEF : SYM_NORMAL, decl, is_extern); current_name = NULL; - $$ = $3; + $$ = $4; + dont_want_type_specifier = true; } ; @@ -189,8 +203,9 @@ decl_specifier_seq_opt: ; decl_specifier_seq: - decl_specifier { decl_spec = *$1; } + attribute_opt decl_specifier { decl_spec = *$2; } | decl_specifier_seq decl_specifier { decl_spec = *$2; } + | decl_specifier_seq ATTRIBUTE_PHRASE { decl_spec = *$2; } ; decl_specifier: @@ -200,7 +215,8 @@ decl_specifier: remove_node($1); $$ = $1; } - | type_specifier + | type_specifier { dont_want_type_specifier = true; $$ = $1; } + | type_qualifier ; storage_class_specifier: @@ -213,24 +229,23 @@ storage_class_specifier: type_specifier: simple_type_specifier - | cvar_qualifier | TYPEOF_KEYW '(' parameter_declaration ')' | TYPEOF_PHRASE /* References to s/u/e's defined elsewhere. Rearrange things so that it is easier to expand the definition fully later. */ - | STRUCT_KEYW IDENT - { remove_node($1); (*$2)->tag = SYM_STRUCT; $$ = $2; } - | UNION_KEYW IDENT - { remove_node($1); (*$2)->tag = SYM_UNION; $$ = $2; } + | STRUCT_KEYW attribute_opt IDENT + { remove_node($1); (*$3)->tag = SYM_STRUCT; $$ = $3; } + | UNION_KEYW attribute_opt IDENT + { remove_node($1); (*$3)->tag = SYM_UNION; $$ = $3; } | ENUM_KEYW IDENT { remove_node($1); (*$2)->tag = SYM_ENUM; $$ = $2; } /* Full definitions of an s/u/e. Record it. */ - | STRUCT_KEYW IDENT class_body - { record_compound($1, $2, $3, SYM_STRUCT); $$ = $3; } - | UNION_KEYW IDENT class_body - { record_compound($1, $2, $3, SYM_UNION); $$ = $3; } + | STRUCT_KEYW attribute_opt IDENT class_body + { record_compound($1, $3, $4, SYM_STRUCT); $$ = $4; } + | UNION_KEYW attribute_opt IDENT class_body + { record_compound($1, $3, $4, SYM_UNION); $$ = $4; } | ENUM_KEYW IDENT enum_body { record_compound($1, $2, $3, SYM_ENUM); $$ = $3; } /* @@ -239,8 +254,8 @@ type_specifier: | ENUM_KEYW enum_body { add_symbol(NULL, SYM_ENUM, NULL, 0); $$ = $2; } /* Anonymous s/u definitions. Nothing needs doing. */ - | STRUCT_KEYW class_body { $$ = $2; } - | UNION_KEYW class_body { $$ = $2; } + | STRUCT_KEYW attribute_opt class_body { $$ = $3; } + | UNION_KEYW attribute_opt class_body { $$ = $3; } ; simple_type_specifier: @@ -260,22 +275,24 @@ simple_type_specifier: ; ptr_operator: - '*' cvar_qualifier_seq_opt + '*' type_qualifier_seq_opt { $$ = $2 ? $2 : $1; } ; -cvar_qualifier_seq_opt: +type_qualifier_seq_opt: /* empty */ { $$ = NULL; } - | cvar_qualifier_seq + | type_qualifier_seq ; -cvar_qualifier_seq: - cvar_qualifier - | cvar_qualifier_seq cvar_qualifier { $$ = $2; } +type_qualifier_seq: + type_qualifier + | ATTRIBUTE_PHRASE + | type_qualifier_seq type_qualifier { $$ = $2; } + | type_qualifier_seq ATTRIBUTE_PHRASE { $$ = $2; } ; -cvar_qualifier: - CONST_KEYW | VOLATILE_KEYW | ATTRIBUTE_PHRASE +type_qualifier: + CONST_KEYW | VOLATILE_KEYW | RESTRICT_KEYW { /* restrict has no effect in prototypes so ignore it */ remove_node($1); @@ -297,15 +314,7 @@ direct_declarator: current_name = (*$1)->string; $$ = $1; } - } - | TYPE - { if (current_name != NULL) { - error_with_pos("unexpected second declaration name"); - YYERROR; - } else { - current_name = (*$1)->string; - $$ = $1; - } + dont_want_type_specifier = false; } | direct_declarator '(' parameter_declaration_clause ')' { $$ = $4; } @@ -325,16 +334,19 @@ nested_declarator: ; direct_nested_declarator: - IDENT - | TYPE - | direct_nested_declarator '(' parameter_declaration_clause ')' + direct_nested_declarator1 + | direct_nested_declarator1 '(' parameter_declaration_clause ')' { $$ = $4; } - | direct_nested_declarator '(' error ')' + ; + +direct_nested_declarator1: + IDENT { $$ = $1; dont_want_type_specifier = false; } + | direct_nested_declarator1 '(' error ')' { $$ = $4; } - | direct_nested_declarator BRACKET_PHRASE + | direct_nested_declarator1 BRACKET_PHRASE { $$ = $2; } - | '(' nested_declarator ')' - { $$ = $3; } + | '(' attribute_opt nested_declarator ')' + { $$ = $4; } | '(' error ')' { $$ = $3; } ; @@ -352,45 +364,57 @@ parameter_declaration_list_opt: parameter_declaration_list: parameter_declaration + { $$ = $1; dont_want_type_specifier = false; } | parameter_declaration_list ',' parameter_declaration - { $$ = $3; } + { $$ = $3; dont_want_type_specifier = false; } ; parameter_declaration: - decl_specifier_seq m_abstract_declarator + decl_specifier_seq abstract_declarator_opt { $$ = $2 ? $2 : $1; } ; -m_abstract_declarator: - ptr_operator m_abstract_declarator +abstract_declarator_opt: + /* empty */ { $$ = NULL; } + | abstract_declarator + ; + +abstract_declarator: + ptr_operator + | ptr_operator abstract_declarator { $$ = $2 ? $2 : $1; } - | direct_m_abstract_declarator + | direct_abstract_declarator attribute_opt + { $$ = $2; dont_want_type_specifier = false; } ; -direct_m_abstract_declarator: - /* empty */ { $$ = NULL; } - | IDENT +direct_abstract_declarator: + direct_abstract_declarator1 + | direct_abstract_declarator1 open_paren parameter_declaration_clause ')' + { $$ = $4; } + | open_paren parameter_declaration_clause ')' + { $$ = $3; } + ; + +direct_abstract_declarator1: + IDENT { /* For version 2 checksums, we don't want to remember private parameter names. */ remove_node($1); $$ = $1; } - /* This wasn't really a typedef name but an identifier that - shadows one. */ - | TYPE - { remove_node($1); - $$ = $1; - } - | direct_m_abstract_declarator '(' parameter_declaration_clause ')' - { $$ = $4; } - | direct_m_abstract_declarator '(' error ')' + | direct_abstract_declarator1 open_paren error ')' { $$ = $4; } - | direct_m_abstract_declarator BRACKET_PHRASE + | direct_abstract_declarator1 BRACKET_PHRASE { $$ = $2; } - | '(' m_abstract_declarator ')' - { $$ = $3; } - | '(' error ')' + | open_paren attribute_opt abstract_declarator ')' + { $$ = $4; } + | open_paren error ')' { $$ = $3; } + | BRACKET_PHRASE + ; + +open_paren: + '(' { $$ = $1; dont_want_type_specifier = false; } ; function_definition: @@ -430,9 +454,9 @@ member_specification: member_declaration: decl_specifier_seq_opt member_declarator_list_opt ';' - { $$ = $3; } + { $$ = $3; dont_want_type_specifier = false; } | error ';' - { $$ = $2; } + { $$ = $2; dont_want_type_specifier = false; } ; member_declarator_list_opt: @@ -442,7 +466,9 @@ member_declarator_list_opt: member_declarator_list: member_declarator - | member_declarator_list ',' member_declarator { $$ = $3; } + { $$ = $1; dont_want_type_specifier = true; } + | member_declarator_list ',' member_declarator + { $$ = $3; dont_want_type_specifier = true; } ; member_declarator: @@ -457,7 +483,7 @@ member_bitfield_declarator: attribute_opt: /* empty */ { $$ = NULL; } - | attribute_opt ATTRIBUTE_PHRASE + | attribute_opt ATTRIBUTE_PHRASE { $$ = $2; } ; enum_body: @@ -472,12 +498,12 @@ enumerator_list: enumerator: IDENT { - const char *name = strdup((*$1)->string); + const char *name = (*$1)->string; add_symbol(name, SYM_ENUM_CONST, NULL, 0); } | IDENT '=' EXPRESSION_PHRASE { - const char *name = strdup((*$1)->string); + const char *name = (*$1)->string; struct string_list *expr = copy_list_range(*$3, *$2); add_symbol(name, SYM_ENUM_CONST, expr, 0); } diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index a0a0be38cbdc..fb50bd4f4103 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -105,9 +105,11 @@ configfiles = $(wildcard $(srctree)/kernel/configs/$(1) $(srctree)/arch/$(SRCARC all-config-fragments = $(call configfiles,*.config) config-fragments = $(call configfiles,$@) +cmd_merge_fragments = $(srctree)/scripts/kconfig/merge_config.sh -m $(KCONFIG_CONFIG) $(config-fragments) + %.config: $(obj)/conf $(if $(config-fragments),, $(error $@ fragment does not exists on this architecture)) - $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m $(KCONFIG_CONFIG) $(config-fragments) + $(call cmd,merge_fragments) $(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig PHONY += tinyconfig diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index 4286d5e7f95d..3b55e7a4131d 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -360,10 +360,12 @@ int conf_read_simple(const char *name, int def) *p = '\0'; - in = zconf_fopen(env); + name = env; + + in = zconf_fopen(name); if (in) { conf_message("using defaults found in %s", - env); + name); goto load; } diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index 6c92ef1e16ef..eaa465b0ccf9 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -1464,8 +1464,8 @@ void ConfigMainWindow::loadConfig(void) { QString str; - str = QFileDialog::getOpenFileName(this, "", configname); - if (str.isNull()) + str = QFileDialog::getOpenFileName(this, QString(), configname); + if (str.isEmpty()) return; if (conf_read(str.toLocal8Bit().constData())) @@ -1491,8 +1491,8 @@ void ConfigMainWindow::saveConfigAs(void) { QString str; - str = QFileDialog::getSaveFileName(this, "", configname); - if (str.isNull()) + str = QFileDialog::getSaveFileName(this, QString(), configname); + if (str.isEmpty()) return; if (conf_write(str.toLocal8Bit().constData())) { diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index 89b84bf8e21f..7beb59dec5a0 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -388,6 +388,7 @@ static void sym_warn_unmet_dep(const struct symbol *sym) " Selected by [m]:\n"); fputs(str_get(&gs), stderr); + str_free(&gs); sym_warnings++; } diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 7ea59dc4926b..e18ae7dc8140 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -33,6 +33,10 @@ static bool module_enabled; static bool modversions; /* Is CONFIG_MODULE_SRCVERSION_ALL set? */ static bool all_versions; +/* Is CONFIG_BASIC_MODVERSIONS set? */ +static bool basic_modversions; +/* Is CONFIG_EXTENDED_MODVERSIONS set? */ +static bool extended_modversions; /* If we are modposting external module set to 1 */ static bool external_module; /* Only warn about unresolved symbols */ @@ -1806,13 +1810,56 @@ static void add_exported_symbols(struct buffer *buf, struct module *mod) } /** + * Record CRCs for unresolved symbols, supporting long names + */ +static void add_extended_versions(struct buffer *b, struct module *mod) +{ + struct symbol *s; + + if (!extended_modversions) + return; + + buf_printf(b, "\n"); + buf_printf(b, "static const u32 ____version_ext_crcs[]\n"); + buf_printf(b, "__used __section(\"__version_ext_crcs\") = {\n"); + list_for_each_entry(s, &mod->unresolved_symbols, list) { + if (!s->module) + continue; + if (!s->crc_valid) { + warn("\"%s\" [%s.ko] has no CRC!\n", + s->name, mod->name); + continue; + } + buf_printf(b, "\t0x%08x,\n", s->crc); + } + buf_printf(b, "};\n"); + + buf_printf(b, "static const char ____version_ext_names[]\n"); + buf_printf(b, "__used __section(\"__version_ext_names\") =\n"); + list_for_each_entry(s, &mod->unresolved_symbols, list) { + if (!s->module) + continue; + if (!s->crc_valid) + /* + * We already warned on this when producing the crc + * table. + * We need to skip its name too, as the indexes in + * both tables need to align. + */ + continue; + buf_printf(b, "\t\"%s\\0\"\n", s->name); + } + buf_printf(b, ";\n"); +} + +/** * Record CRCs for unresolved symbols **/ static void add_versions(struct buffer *b, struct module *mod) { struct symbol *s; - if (!modversions) + if (!basic_modversions) return; buf_printf(b, "\n"); @@ -1828,11 +1875,16 @@ static void add_versions(struct buffer *b, struct module *mod) continue; } if (strlen(s->name) >= MODULE_NAME_LEN) { - error("too long symbol \"%s\" [%s.ko]\n", - s->name, mod->name); - break; + if (extended_modversions) { + /* this symbol will only be in the extended info */ + continue; + } else { + error("too long symbol \"%s\" [%s.ko]\n", + s->name, mod->name); + break; + } } - buf_printf(b, "\t{ %#8x, \"%s\" },\n", + buf_printf(b, "\t{ 0x%08x, \"%s\" },\n", s->crc, s->name); } @@ -1961,6 +2013,7 @@ static void write_mod_c_file(struct module *mod) add_header(&buf, mod); add_exported_symbols(&buf, mod); add_versions(&buf, mod); + add_extended_versions(&buf, mod); add_depends(&buf, mod); buf_printf(&buf, "\n"); @@ -2126,7 +2179,7 @@ int main(int argc, char **argv) LIST_HEAD(dump_lists); struct dump_list *dl, *dl2; - while ((opt = getopt(argc, argv, "ei:MmnT:to:au:WwENd:")) != -1) { + while ((opt = getopt(argc, argv, "ei:MmnT:to:au:WwENd:xb")) != -1) { switch (opt) { case 'e': external_module = true; @@ -2175,6 +2228,12 @@ int main(int argc, char **argv) case 'd': missing_namespace_deps = optarg; break; + case 'b': + basic_modversions = true; + break; + case 'x': + extended_modversions = true; + break; default: exit(1); } diff --git a/scripts/package/PKGBUILD b/scripts/package/PKGBUILD index dca706617adc..0cf3a55b05e1 100644 --- a/scripts/package/PKGBUILD +++ b/scripts/package/PKGBUILD @@ -22,7 +22,6 @@ license=(GPL-2.0-only) makedepends=( bc bison - cpio flex gettext kmod diff --git a/scripts/package/builddeb b/scripts/package/builddeb index ad7aba0f268e..3627ca227e5a 100755 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -5,10 +5,12 @@ # # Simple script to generate a deb package for a Linux kernel. All the # complexity of what to do with a kernel after it is installed or removed -# is left to other scripts and packages: they can install scripts in the -# /etc/kernel/{pre,post}{inst,rm}.d/ directories (or an alternative location -# specified in KDEB_HOOKDIR) that will be called on package install and -# removal. +# is left to other scripts and packages. Scripts can be placed into the +# preinst, postinst, prerm and postrm directories in /etc/kernel or +# /usr/share/kernel. A different list of search directories can be given +# via KDEB_HOOKDIR. Scripts in directories earlier in the list will +# override scripts of the same name in later directories. The script will +# be called on package installation and removal. set -eu @@ -74,10 +76,8 @@ install_maint_scripts () { # kernel packages, as well as kernel packages built using make-kpkg. # make-kpkg sets $INITRD to indicate whether an initramfs is wanted, and # so do we; recent versions of dracut and initramfs-tools will obey this. - debhookdir=${KDEB_HOOKDIR:-/etc/kernel} + debhookdir=${KDEB_HOOKDIR:-/etc/kernel /usr/share/kernel} for script in postinst postrm preinst prerm; do - mkdir -p "${pdir}${debhookdir}/${script}.d" - mkdir -p "${pdir}/DEBIAN" cat <<-EOF > "${pdir}/DEBIAN/${script}" #!/bin/sh @@ -90,7 +90,15 @@ install_maint_scripts () { # Tell initramfs builder whether it's wanted export INITRD=$(if_enabled_echo CONFIG_BLK_DEV_INITRD Yes No) - test -d ${debhookdir}/${script}.d && run-parts --arg="${KERNELRELEASE}" --arg="/${installed_image_path}" ${debhookdir}/${script}.d + # run-parts will error out if one of its directory arguments does not + # exist, so filter the list of hook directories accordingly. + hookdirs= + for dir in ${debhookdir}; do + test -d "\$dir/${script}.d" || continue + hookdirs="\$hookdirs \$dir/${script}.d" + done + hookdirs="\${hookdirs# }" + test -n "\$hookdirs" && run-parts --arg="${KERNELRELEASE}" --arg="/${installed_image_path}" \$hookdirs exit 0 EOF chmod 755 "${pdir}/DEBIAN/${script}" diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build index d3c5b104c063..bb6e23c1174e 100755 --- a/scripts/package/install-extmod-build +++ b/scripts/package/install-extmod-build @@ -49,17 +49,10 @@ mkdir -p "${destdir}" # This caters to host programs that participate in Kbuild. objtool and # resolve_btfids are out of scope. if [ "${CC}" != "${HOSTCC}" ]; then - echo "Rebuilding host programs with ${CC}..." - - # This leverages external module building. - # - Clear sub_make_done to allow the top-level Makefile to redo sub-make. - # - Filter out --no-print-directory to print "Entering directory" logs - # when Make changes the working directory. - unset sub_make_done - MAKEFLAGS=$(echo "${MAKEFLAGS}" | sed s/--no-print-directory//) - - cat <<-'EOF' > "${destdir}/Kbuild" - subdir-y := scripts + cat "${destdir}/scripts/Makefile" - <<-'EOF' > "${destdir}/scripts/Kbuild" + subdir-y += basic + hostprogs-always-y += mod/modpost + mod/modpost-objs := $(addprefix mod/, modpost.o file2alias.o sumversion.o symsearch.o) EOF # HOSTCXX is not overridden. The C++ compiler is used to build: @@ -67,20 +60,12 @@ if [ "${CC}" != "${HOSTCC}" ]; then # - GCC plugins, which will not work on the installed system even after # being rebuilt. # - # Use the single-target build to avoid the modpost invocation, which - # would overwrite Module.symvers. - "${MAKE}" HOSTCC="${CC}" KBUILD_OUTPUT=. KBUILD_EXTMOD="${destdir}" scripts/ - - cat <<-'EOF' > "${destdir}/scripts/Kbuild" - subdir-y := basic - hostprogs-always-y := mod/modpost - mod/modpost-objs := $(addprefix mod/, modpost.o file2alias.o sumversion.o symsearch.o) - EOF - - # Run once again to rebuild scripts/basic/ and scripts/mod/modpost. - "${MAKE}" HOSTCC="${CC}" KBUILD_OUTPUT=. KBUILD_EXTMOD="${destdir}" scripts/ + # Clear VPATH and srcroot because the source files reside in the output + # directory. + # shellcheck disable=SC2016 # $(MAKE), $(CC), and $(build) will be expanded by Make + "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC=$(CC) VPATH= srcroot=. $(build)='"${destdir}"/scripts - rm -f "${destdir}/Kbuild" "${destdir}/scripts/Kbuild" + rm -f "${destdir}/scripts/Kbuild" fi find "${destdir}" \( -name '.*.cmd' -o -name '*.o' \) -delete diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian index b038a1380b8a..b6dd98ca860b 100755 --- a/scripts/package/mkdebian +++ b/scripts/package/mkdebian @@ -205,7 +205,7 @@ Priority: optional Maintainer: $maintainer Rules-Requires-Root: no Build-Depends: debhelper-compat (= 12) -Build-Depends-Arch: bc, bison, cpio, flex, +Build-Depends-Arch: bc, bison, flex, gcc-${host_gnu} <!pkg.${sourcename}.nokernelheaders>, kmod, libelf-dev:native, libssl-dev:native, libssl-dev <!pkg.${sourcename}.nokernelheaders>, diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 1edc12862a7d..9b6c2f157f83 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -2038,7 +2038,7 @@ static int apparmor_dointvec(const struct ctl_table *table, int write, return proc_dointvec(table, write, buffer, lenp, ppos); } -static struct ctl_table apparmor_sysctl_table[] = { +static const struct ctl_table apparmor_sysctl_table[] = { #ifdef CONFIG_USER_NS { .procname = "unprivileged_userns_apparmor_policy", diff --git a/security/keys/sysctl.c b/security/keys/sysctl.c index 91f000eef3ad..cde08c478f32 100644 --- a/security/keys/sysctl.c +++ b/security/keys/sysctl.c @@ -9,7 +9,7 @@ #include <linux/sysctl.h> #include "internal.h" -static struct ctl_table key_sysctls[] = { +static const struct ctl_table key_sysctls[] = { { .procname = "maxkeys", .data = &key_quota_maxkeys, diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c index 1a2d02fee09b..1971710620c1 100644 --- a/security/yama/yama_lsm.c +++ b/security/yama/yama_lsm.c @@ -452,7 +452,7 @@ static int yama_dointvec_minmax(const struct ctl_table *table, int write, static int max_scope = YAMA_SCOPE_NO_ATTACH; -static struct ctl_table yama_sysctl_table[] = { +static const struct ctl_table yama_sysctl_table[] = { { .procname = "ptrace_scope", .data = &ptrace_scope, diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 8a3384342e8d..6c2b6a62d9d2 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -3245,7 +3245,7 @@ static int snd_pcm_xfern_frames_ioctl(struct snd_pcm_substream *substream, if (copy_from_user(&xfern, _xfern, sizeof(xfern))) return -EFAULT; - bufs = memdup_user(xfern.bufs, sizeof(void *) * runtime->channels); + bufs = memdup_array_user(xfern.bufs, runtime->channels, sizeof(void *)); if (IS_ERR(bufs)) return PTR_ERR(bufs); if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) diff --git a/sound/pci/hda/hda_auto_parser.c b/sound/pci/hda/hda_auto_parser.c index 84393f4f429d..8923813ce424 100644 --- a/sound/pci/hda/hda_auto_parser.c +++ b/sound/pci/hda/hda_auto_parser.c @@ -80,7 +80,11 @@ static int compare_input_type(const void *ap, const void *bp) /* In case one has boost and the other one has not, pick the one with boost first. */ - return (int)(b->has_boost_on_pin - a->has_boost_on_pin); + if (a->has_boost_on_pin != b->has_boost_on_pin) + return (int)(b->has_boost_on_pin - a->has_boost_on_pin); + + /* Keep the original order */ + return a->order - b->order; } /* Reorder the surround channels @@ -400,6 +404,8 @@ int snd_hda_parse_pin_defcfg(struct hda_codec *codec, reorder_outputs(cfg->speaker_outs, cfg->speaker_pins); /* sort inputs in the order of AUTO_PIN_* type */ + for (i = 0; i < cfg->num_inputs; i++) + cfg->inputs[i].order = i; sort(cfg->inputs, cfg->num_inputs, sizeof(cfg->inputs[0]), compare_input_type, NULL); diff --git a/sound/pci/hda/hda_auto_parser.h b/sound/pci/hda/hda_auto_parser.h index 579b11beac71..87af3d8c02f7 100644 --- a/sound/pci/hda/hda_auto_parser.h +++ b/sound/pci/hda/hda_auto_parser.h @@ -37,6 +37,7 @@ struct auto_pin_cfg_item { unsigned int is_headset_mic:1; unsigned int is_headphone_mic:1; /* Mic-only in headphone jack */ unsigned int has_boost_on_pin:1; + int order; }; struct auto_pin_cfg; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d3c9ed963588..8192be394d0d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7497,6 +7497,16 @@ static void alc287_fixup_lenovo_thinkpad_with_alc1318(struct hda_codec *codec, spec->gen.pcm_playback_hook = alc287_alc1318_playback_pcm_hook; } +/* + * Clear COEF 0x0d (PCBEEP passthrough) bit 0x40 where BIOS sets it wrongly + * at PM resume + */ +static void alc283_fixup_dell_hp_resume(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + if (action == HDA_FIXUP_ACT_INIT) + alc_write_coef_idx(codec, 0xd, 0x2800); +} enum { ALC269_FIXUP_GPIO2, @@ -7799,6 +7809,7 @@ enum { ALC269_FIXUP_VAIO_VJFH52_MIC_NO_PRESENCE, ALC233_FIXUP_MEDION_MTL_SPK, ALC294_FIXUP_BASS_SPEAKER_15, + ALC283_FIXUP_DELL_HP_RESUME, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -10143,6 +10154,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc294_fixup_bass_speaker_15, }, + [ALC283_FIXUP_DELL_HP_RESUME] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc283_fixup_dell_hp_resume, + }, }; static const struct hda_quirk alc269_fixup_tbl[] = { @@ -10203,6 +10218,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x05f4, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x05f5, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x05f6, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x0604, "Dell Venue 11 Pro 7130", ALC283_FIXUP_DELL_HP_RESUME), SND_PCI_QUIRK(0x1028, 0x0615, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK), SND_PCI_QUIRK(0x1028, 0x0616, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK), SND_PCI_QUIRK(0x1028, 0x062c, "Dell Latitude E5550", ALC292_FIXUP_DELL_E7X), @@ -10918,7 +10934,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3869, "Lenovo Yoga7 14IAL7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), HDA_CODEC_QUIRK(0x17aa, 0x386e, "Legion Y9000X 2022 IAH7", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x386e, "Yoga Pro 7 14ARP8", ALC285_FIXUP_SPEAKER2_TO_DAC1), - HDA_CODEC_QUIRK(0x17aa, 0x386f, "Legion Pro 7 16ARX8H", ALC287_FIXUP_TAS2781_I2C), + HDA_CODEC_QUIRK(0x17aa, 0x38a8, "Legion Pro 7 16ARX8H", ALC287_FIXUP_TAS2781_I2C), /* this must match before PCI SSID 17aa:386f below */ SND_PCI_QUIRK(0x17aa, 0x386f, "Legion Pro 7i 16IAX7", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x3870, "Lenovo Yoga 7 14ARB7", ALC287_FIXUP_YOGA7_14ARB7_I2C), SND_PCI_QUIRK(0x17aa, 0x3877, "Lenovo Legion 7 Slim 16ARHA7", ALC287_FIXUP_CS35L41_I2C_2), diff --git a/sound/soc/amd/acp/acp-i2s.c b/sound/soc/amd/acp/acp-i2s.c index 1f59ee248771..89e99ed4275a 100644 --- a/sound/soc/amd/acp/acp-i2s.c +++ b/sound/soc/amd/acp/acp-i2s.c @@ -181,6 +181,7 @@ static int acp_i2s_set_tdm_slot(struct snd_soc_dai *dai, u32 tx_mask, u32 rx_mas break; default: dev_err(dev, "Unknown chip revision %d\n", chip->acp_rev); + spin_unlock_irq(&adata->acp_lock); return -EINVAL; } } diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index ecf57a6cb7c3..b16587d8f97a 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -307,6 +307,34 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { { .driver_data = &acp6x_card, .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83L3"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83N6"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83Q2"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83Q3"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "UM5302TA"), } diff --git a/sound/soc/codecs/da7213.c b/sound/soc/codecs/da7213.c index ca4cc954efa8..eb97ac73ec06 100644 --- a/sound/soc/codecs/da7213.c +++ b/sound/soc/codecs/da7213.c @@ -2203,6 +2203,8 @@ static int da7213_i2c_probe(struct i2c_client *i2c) return ret; } + mutex_init(&da7213->ctrl_lock); + pm_runtime_set_autosuspend_delay(&i2c->dev, 100); pm_runtime_use_autosuspend(&i2c->dev); pm_runtime_set_active(&i2c->dev); diff --git a/sound/soc/codecs/es8316.c b/sound/soc/codecs/es8316.c index f508df01145b..e7bd561a8f40 100644 --- a/sound/soc/codecs/es8316.c +++ b/sound/soc/codecs/es8316.c @@ -101,7 +101,7 @@ static const struct snd_kcontrol_new es8316_snd_controls[] = { SOC_DOUBLE_R_TLV("DAC Playback Volume", ES8316_DAC_VOLL, ES8316_DAC_VOLR, 0, 0xc0, 1, dac_vol_tlv), SOC_SINGLE("DAC Soft Ramp Switch", ES8316_DAC_SET1, 4, 1, 1), - SOC_SINGLE("DAC Soft Ramp Rate", ES8316_DAC_SET1, 2, 4, 0), + SOC_SINGLE("DAC Soft Ramp Rate", ES8316_DAC_SET1, 2, 3, 0), SOC_SINGLE("DAC Notch Filter Switch", ES8316_DAC_SET2, 6, 1, 0), SOC_SINGLE("DAC Double Fs Switch", ES8316_DAC_SET2, 7, 1, 0), SOC_SINGLE("DAC Stereo Enhancement", ES8316_DAC_SET3, 0, 7, 0), diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c index b06eead7e0f6..066d92b54312 100644 --- a/sound/soc/codecs/es8326.c +++ b/sound/soc/codecs/es8326.c @@ -911,7 +911,7 @@ static void es8326_jack_detect_handler(struct work_struct *work) regmap_write(es8326->regmap, ES8326_INT_SOURCE, (ES8326_INT_SRC_PIN9 | ES8326_INT_SRC_BUTTON)); regmap_write(es8326->regmap, ES8326_SYS_BIAS, 0x1f); - regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x0f, 0x08); + regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x0f, 0x0d); queue_delayed_work(system_wq, &es8326->jack_detect_work, msecs_to_jiffies(400)); es8326->hp = 1; @@ -1023,7 +1023,7 @@ static void es8326_init(struct snd_soc_component *component) struct es8326_priv *es8326 = snd_soc_component_get_drvdata(component); regmap_write(es8326->regmap, ES8326_RESET, 0x1f); - regmap_write(es8326->regmap, ES8326_VMIDSEL, 0x0E); + regmap_write(es8326->regmap, ES8326_VMIDSEL, 0x3E); regmap_write(es8326->regmap, ES8326_ANA_LP, 0xf0); usleep_range(10000, 15000); regmap_write(es8326->regmap, ES8326_HPJACK_TIMER, 0xd9); diff --git a/sound/soc/codecs/rt5514.c b/sound/soc/codecs/rt5514.c index 2b3c0f9e178c..9cb74962161a 100644 --- a/sound/soc/codecs/rt5514.c +++ b/sound/soc/codecs/rt5514.c @@ -1091,8 +1091,7 @@ static int rt5514_set_bias_level(struct snd_soc_component *component, static int rt5514_probe(struct snd_soc_component *component) { struct rt5514_priv *rt5514 = snd_soc_component_get_drvdata(component); - struct platform_device *pdev = container_of(component->dev, - struct platform_device, dev); + struct platform_device *pdev = to_platform_device(component->dev); rt5514->mclk = devm_clk_get_optional(component->dev, "mclk"); if (IS_ERR(rt5514->mclk)) diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig index e5fbf5305ea2..c4cf3cff58de 100644 --- a/sound/soc/fsl/Kconfig +++ b/sound/soc/fsl/Kconfig @@ -6,6 +6,7 @@ comment "Common SoC Audio options for Freescale CPUs:" config SND_SOC_FSL_ASRC tristate "Asynchronous Sample Rate Converter (ASRC) module support" depends on HAS_DMA + select DMA_SHARED_BUFFER select REGMAP_MMIO select SND_SOC_GENERIC_DMAENGINE_PCM select SND_COMPRESS_ACCEL diff --git a/sound/soc/fsl/fsl_asrc_m2m.c b/sound/soc/fsl/fsl_asrc_m2m.c index 4906843e2a8f..f46881f71e43 100644 --- a/sound/soc/fsl/fsl_asrc_m2m.c +++ b/sound/soc/fsl/fsl_asrc_m2m.c @@ -183,7 +183,7 @@ static int asrc_dmaconfig(struct fsl_asrc_pair *pair, } /* main function of converter */ -static void asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_task_runtime *task) +static int asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_task_runtime *task) { struct fsl_asrc *asrc = pair->asrc; struct device *dev = &asrc->pdev->dev; @@ -193,7 +193,7 @@ static void asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_tas unsigned int out_dma_len; unsigned int width; u32 fifo_addr; - int ret; + int ret = 0; /* set ratio mod */ if (asrc->m2m_set_ratio_mod) { @@ -215,6 +215,7 @@ static void asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_tas in_buf_len > ASRC_M2M_BUFFER_SIZE || in_buf_len % (width * pair->channels / 8)) { dev_err(dev, "out buffer size is error: [%d]\n", in_buf_len); + ret = -EINVAL; goto end; } @@ -245,6 +246,7 @@ static void asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_tas } } else if (out_dma_len > ASRC_M2M_BUFFER_SIZE) { dev_err(dev, "cap buffer size error\n"); + ret = -EINVAL; goto end; } @@ -263,12 +265,14 @@ static void asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_tas if (!wait_for_completion_interruptible_timeout(&pair->complete[IN], 10 * HZ)) { dev_err(dev, "out DMA task timeout\n"); + ret = -ETIMEDOUT; goto end; } if (out_dma_len > 0) { if (!wait_for_completion_interruptible_timeout(&pair->complete[OUT], 10 * HZ)) { dev_err(dev, "cap DMA task timeout\n"); + ret = -ETIMEDOUT; goto end; } } @@ -278,7 +282,7 @@ static void asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_tas /* update payload length for capture */ task->output_size = out_dma_len; end: - return; + return ret; } static int fsl_asrc_m2m_comp_open(struct snd_compr_stream *stream) @@ -525,9 +529,7 @@ static int fsl_asrc_m2m_comp_task_start(struct snd_compr_stream *stream, struct snd_compr_runtime *runtime = stream->runtime; struct fsl_asrc_pair *pair = runtime->private_data; - asrc_m2m_device_run(pair, task); - - return 0; + return asrc_m2m_device_run(pair, task); } static int fsl_asrc_m2m_comp_task_stop(struct snd_compr_stream *stream, @@ -633,7 +635,7 @@ int fsl_asrc_m2m_suspend(struct fsl_asrc *asrc) for (i = 0; i < PAIR_CTX_NUM; i++) { pair = asrc->pair[i]; - if (!pair) + if (!pair || !pair->dma_buffer[IN].area || !pair->dma_buffer[OUT].area) continue; if (!completion_done(&pair->complete[IN])) { if (pair->dma_chan[IN]) diff --git a/sound/soc/generic/audio-graph-card2.c b/sound/soc/generic/audio-graph-card2.c index c36b1a2ac949..ee94b256b770 100644 --- a/sound/soc/generic/audio-graph-card2.c +++ b/sound/soc/generic/audio-graph-card2.c @@ -648,23 +648,23 @@ multi_err: static int graph_parse_node_single(struct simple_util_priv *priv, enum graph_type gtype, - struct device_node *port, + struct device_node *ep, struct link_info *li, int is_cpu) { - struct device_node *ep __free(device_node) = of_graph_get_next_port_endpoint(port, NULL); - return __graph_parse_node(priv, gtype, ep, li, is_cpu, 0); } static int graph_parse_node(struct simple_util_priv *priv, enum graph_type gtype, - struct device_node *port, + struct device_node *ep, struct link_info *li, int is_cpu) { + struct device_node *port __free(device_node) = ep_to_port(ep); + if (graph_lnk_is_multi(port)) return graph_parse_node_multi(priv, gtype, port, li, is_cpu); else - return graph_parse_node_single(priv, gtype, port, li, is_cpu); + return graph_parse_node_single(priv, gtype, ep, li, is_cpu); } static void graph_parse_daifmt(struct device_node *node, unsigned int *daifmt) @@ -722,14 +722,15 @@ static unsigned int graph_parse_bitframe(struct device_node *ep) static void graph_link_init(struct simple_util_priv *priv, struct device_node *lnk, - struct device_node *port_cpu, - struct device_node *port_codec, + struct device_node *ep_cpu, + struct device_node *ep_codec, struct link_info *li, int is_cpu_node) { struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, li->link); struct simple_dai_props *dai_props = simple_priv_to_props(priv, li->link); - struct device_node *ep_cpu, *ep_codec; + struct device_node *port_cpu = ep_to_port(ep_cpu); + struct device_node *port_codec = ep_to_port(ep_codec); struct device_node *multi_cpu_port = NULL, *multi_codec_port = NULL; struct snd_soc_dai_link_component *dlc; unsigned int daifmt = 0; @@ -739,25 +740,23 @@ static void graph_link_init(struct simple_util_priv *priv, int multi_cpu_port_idx = 1, multi_codec_port_idx = 1; int i; - of_node_get(port_cpu); if (graph_lnk_is_multi(port_cpu)) { multi_cpu_port = port_cpu; ep_cpu = graph_get_next_multi_ep(&multi_cpu_port, multi_cpu_port_idx++); of_node_put(port_cpu); port_cpu = ep_to_port(ep_cpu); } else { - ep_cpu = of_graph_get_next_port_endpoint(port_cpu, NULL); + of_node_get(ep_cpu); } struct device_node *ports_cpu __free(device_node) = port_to_ports(port_cpu); - of_node_get(port_codec); if (graph_lnk_is_multi(port_codec)) { multi_codec_port = port_codec; ep_codec = graph_get_next_multi_ep(&multi_codec_port, multi_codec_port_idx++); of_node_put(port_codec); port_codec = ep_to_port(ep_codec); } else { - ep_codec = of_graph_get_next_port_endpoint(port_codec, NULL); + of_node_get(ep_codec); } struct device_node *ports_codec __free(device_node) = port_to_ports(port_codec); @@ -833,7 +832,7 @@ int audio_graph2_link_normal(struct simple_util_priv *priv, { struct device_node *cpu_port = lnk; struct device_node *cpu_ep __free(device_node) = of_graph_get_next_port_endpoint(cpu_port, NULL); - struct device_node *codec_port __free(device_node) = of_graph_get_remote_port(cpu_ep); + struct device_node *codec_ep __free(device_node) = of_graph_get_remote_endpoint(cpu_ep); int ret; /* @@ -841,18 +840,18 @@ int audio_graph2_link_normal(struct simple_util_priv *priv, * see * __graph_parse_node() :: DAI Naming */ - ret = graph_parse_node(priv, GRAPH_NORMAL, codec_port, li, 0); + ret = graph_parse_node(priv, GRAPH_NORMAL, codec_ep, li, 0); if (ret < 0) return ret; /* * call CPU, and set DAI Name */ - ret = graph_parse_node(priv, GRAPH_NORMAL, cpu_port, li, 1); + ret = graph_parse_node(priv, GRAPH_NORMAL, cpu_ep, li, 1); if (ret < 0) return ret; - graph_link_init(priv, lnk, cpu_port, codec_port, li, 1); + graph_link_init(priv, lnk, cpu_ep, codec_ep, li, 1); return ret; } @@ -864,15 +863,15 @@ int audio_graph2_link_dpcm(struct simple_util_priv *priv, { struct device_node *ep __free(device_node) = of_graph_get_next_port_endpoint(lnk, NULL); struct device_node *rep __free(device_node) = of_graph_get_remote_endpoint(ep); - struct device_node *cpu_port = NULL; - struct device_node *codec_port = NULL; + struct device_node *cpu_ep = NULL; + struct device_node *codec_ep = NULL; struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, li->link); struct simple_dai_props *dai_props = simple_priv_to_props(priv, li->link); int is_cpu = graph_util_is_ports0(lnk); int ret; if (is_cpu) { - cpu_port = of_graph_get_remote_port(ep); /* rport */ + cpu_ep = rep; /* * dpcm { @@ -901,12 +900,12 @@ int audio_graph2_link_dpcm(struct simple_util_priv *priv, dai_link->dynamic = 1; dai_link->dpcm_merged_format = 1; - ret = graph_parse_node(priv, GRAPH_DPCM, cpu_port, li, 1); + ret = graph_parse_node(priv, GRAPH_DPCM, cpu_ep, li, 1); if (ret) - goto err; + return ret; } else { - codec_port = of_graph_get_remote_port(ep); /* rport */ + codec_ep = rep; /* * dpcm { @@ -937,18 +936,15 @@ int audio_graph2_link_dpcm(struct simple_util_priv *priv, dai_link->no_pcm = 1; dai_link->be_hw_params_fixup = simple_util_be_hw_params_fixup; - ret = graph_parse_node(priv, GRAPH_DPCM, codec_port, li, 0); + ret = graph_parse_node(priv, GRAPH_DPCM, codec_ep, li, 0); if (ret < 0) - goto err; + return ret; } graph_parse_convert(ep, dai_props); /* at node of <dpcm> */ graph_parse_convert(rep, dai_props); /* at node of <CPU/Codec> */ - graph_link_init(priv, lnk, cpu_port, codec_port, li, is_cpu); -err: - of_node_put(cpu_port); - of_node_put(codec_port); + graph_link_init(priv, lnk, cpu_ep, codec_ep, li, is_cpu); return ret; } @@ -1013,26 +1009,26 @@ int audio_graph2_link_c2c(struct simple_util_priv *priv, struct device_node *ep0 __free(device_node) = of_graph_get_next_port_endpoint(port0, NULL); struct device_node *ep1 __free(device_node) = of_graph_get_next_port_endpoint(port1, NULL); - struct device_node *codec0_port __free(device_node) = of_graph_get_remote_port(ep0); - struct device_node *codec1_port __free(device_node) = of_graph_get_remote_port(ep1); + struct device_node *codec0_ep __free(device_node) = of_graph_get_remote_endpoint(ep0); + struct device_node *codec1_ep __free(device_node) = of_graph_get_remote_endpoint(ep1); /* * call Codec first. * see * __graph_parse_node() :: DAI Naming */ - ret = graph_parse_node(priv, GRAPH_C2C, codec1_port, li, 0); + ret = graph_parse_node(priv, GRAPH_C2C, codec1_ep, li, 0); if (ret < 0) return ret; /* * call CPU, and set DAI Name */ - ret = graph_parse_node(priv, GRAPH_C2C, codec0_port, li, 1); + ret = graph_parse_node(priv, GRAPH_C2C, codec0_ep, li, 1); if (ret < 0) return ret; - graph_link_init(priv, lnk, codec0_port, codec1_port, li, 1); + graph_link_init(priv, lnk, codec0_ep, codec1_ep, li, 1); return ret; } diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 9caa4407c1ca..6446cda0f857 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -1132,7 +1132,22 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_SSP0_AIF2 | BYT_RT5640_MCLK_EN), }, - { /* Vexia Edu Atla 10 tablet */ + { + /* Vexia Edu Atla 10 tablet 5V version */ + .matches = { + /* Having all 3 of these not set is somewhat unique */ + DMI_MATCH(DMI_SYS_VENDOR, "To be filled by O.E.M."), + DMI_MATCH(DMI_PRODUCT_NAME, "To be filled by O.E.M."), + DMI_MATCH(DMI_BOARD_NAME, "To be filled by O.E.M."), + /* Above strings are too generic, also match on BIOS date */ + DMI_MATCH(DMI_BIOS_DATE, "05/14/2015"), + }, + .driver_data = (void *)(BYTCR_INPUT_DEFAULTS | + BYT_RT5640_JD_NOT_INV | + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, + { /* Vexia Edu Atla 10 tablet 9V version */ .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), DMI_MATCH(DMI_BOARD_NAME, "Aptio CRB"), diff --git a/sound/soc/renesas/Kconfig b/sound/soc/renesas/Kconfig index 426632996a0a..cb01fb36355f 100644 --- a/sound/soc/renesas/Kconfig +++ b/sound/soc/renesas/Kconfig @@ -67,7 +67,7 @@ config SND_SH7760_AC97 config SND_SIU_MIGOR tristate "SIU sound support on Migo-R" - depends on SH_MIGOR && I2C + depends on SH_MIGOR && I2C && DMADEVICES select SND_SOC_SH4_SIU select SND_SOC_WM8978 help diff --git a/sound/soc/rockchip/rockchip_i2s_tdm.c b/sound/soc/rockchip/rockchip_i2s_tdm.c index bd0dc586e24a..7f5fcaecee4b 100644 --- a/sound/soc/rockchip/rockchip_i2s_tdm.c +++ b/sound/soc/rockchip/rockchip_i2s_tdm.c @@ -22,7 +22,6 @@ #define DRV_NAME "rockchip-i2s-tdm" -#define DEFAULT_MCLK_FS 256 #define CH_GRP_MAX 4 /* The max channel 8 / 2 */ #define MULTIPLEX_CH_MAX 10 @@ -70,6 +69,8 @@ struct rk_i2s_tdm_dev { bool has_playback; bool has_capture; struct snd_soc_dai_driver *dai; + unsigned int mclk_rx_freq; + unsigned int mclk_tx_freq; }; static int to_ch_num(unsigned int val) @@ -617,6 +618,27 @@ static int rockchip_i2s_trcm_mode(struct snd_pcm_substream *substream, return 0; } +static int rockchip_i2s_tdm_set_sysclk(struct snd_soc_dai *cpu_dai, int stream, + unsigned int freq, int dir) +{ + struct rk_i2s_tdm_dev *i2s_tdm = to_info(cpu_dai); + + if (i2s_tdm->clk_trcm) { + i2s_tdm->mclk_tx_freq = freq; + i2s_tdm->mclk_rx_freq = freq; + } else { + if (stream == SNDRV_PCM_STREAM_PLAYBACK) + i2s_tdm->mclk_tx_freq = freq; + else + i2s_tdm->mclk_rx_freq = freq; + } + + dev_dbg(i2s_tdm->dev, "The target mclk_%s freq is: %d\n", + stream ? "rx" : "tx", freq); + + return 0; +} + static int rockchip_i2s_tdm_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, struct snd_soc_dai *dai) @@ -631,15 +653,19 @@ static int rockchip_i2s_tdm_hw_params(struct snd_pcm_substream *substream, if (i2s_tdm->clk_trcm == TRCM_TX) { mclk = i2s_tdm->mclk_tx; + mclk_rate = i2s_tdm->mclk_tx_freq; } else if (i2s_tdm->clk_trcm == TRCM_RX) { mclk = i2s_tdm->mclk_rx; + mclk_rate = i2s_tdm->mclk_rx_freq; } else if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { mclk = i2s_tdm->mclk_tx; + mclk_rate = i2s_tdm->mclk_tx_freq; } else { mclk = i2s_tdm->mclk_rx; + mclk_rate = i2s_tdm->mclk_rx_freq; } - err = clk_set_rate(mclk, DEFAULT_MCLK_FS * params_rate(params)); + err = clk_set_rate(mclk, mclk_rate); if (err) return err; @@ -799,6 +825,7 @@ static const struct snd_soc_dai_ops rockchip_i2s_tdm_dai_ops = { .hw_params = rockchip_i2s_tdm_hw_params, .set_bclk_ratio = rockchip_i2s_tdm_set_bclk_ratio, .set_fmt = rockchip_i2s_tdm_set_fmt, + .set_sysclk = rockchip_i2s_tdm_set_sysclk, .set_tdm_slot = rockchip_dai_tdm_slot, .trigger = rockchip_i2s_tdm_trigger, }; diff --git a/sound/soc/sof/imx/imx8.c b/sound/soc/sof/imx/imx8.c index 0b85b29d1067..1e7bf00d7c46 100644 --- a/sound/soc/sof/imx/imx8.c +++ b/sound/soc/sof/imx/imx8.c @@ -175,8 +175,7 @@ static int imx8_run(struct snd_sof_dev *sdev) static int imx8_probe(struct snd_sof_dev *sdev) { - struct platform_device *pdev = - container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); struct device_node *np = pdev->dev.of_node; struct device_node *res_node; struct resource *mmio; @@ -607,10 +606,31 @@ static struct snd_sof_of_mach sof_imx8_machs[] = { .drv_name = "asoc-audio-graph-card2", }, { + .compatible = "fsl,imx8qxp-mek-wcpu", + .sof_tplg_filename = "sof-imx8-wm8962.tplg", + .drv_name = "asoc-audio-graph-card2", + }, + { .compatible = "fsl,imx8qm-mek", .sof_tplg_filename = "sof-imx8-wm8960.tplg", .drv_name = "asoc-audio-graph-card2", }, + { + .compatible = "fsl,imx8qm-mek-revd", + .sof_tplg_filename = "sof-imx8-wm8962.tplg", + .drv_name = "asoc-audio-graph-card2", + }, + { + .compatible = "fsl,imx8qxp-mek-bb", + .sof_tplg_filename = "sof-imx8-cs42888.tplg", + .drv_name = "asoc-audio-graph-card2", + }, + { + .compatible = "fsl,imx8qm-mek-bb", + .sof_tplg_filename = "sof-imx8-cs42888.tplg", + .drv_name = "asoc-audio-graph-card2", + }, + {} }; diff --git a/sound/soc/sof/imx/imx8m.c b/sound/soc/sof/imx/imx8m.c index ff42743efa79..3cabdebac558 100644 --- a/sound/soc/sof/imx/imx8m.c +++ b/sound/soc/sof/imx/imx8m.c @@ -144,8 +144,7 @@ static int imx8m_reset(struct snd_sof_dev *sdev) static int imx8m_probe(struct snd_sof_dev *sdev) { - struct platform_device *pdev = - container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); struct device_node *np = pdev->dev.of_node; struct device_node *res_node; struct resource *mmio; @@ -295,6 +294,17 @@ static struct snd_soc_dai_driver imx8m_dai[] = { }, }, { + .name = "sai2", + .playback = { + .channels_min = 1, + .channels_max = 32, + }, + .capture = { + .channels_min = 1, + .channels_max = 32, + }, +}, +{ .name = "sai3", .playback = { .channels_min = 1, @@ -306,6 +316,39 @@ static struct snd_soc_dai_driver imx8m_dai[] = { }, }, { + .name = "sai5", + .playback = { + .channels_min = 1, + .channels_max = 32, + }, + .capture = { + .channels_min = 1, + .channels_max = 32, + }, +}, +{ + .name = "sai6", + .playback = { + .channels_min = 1, + .channels_max = 32, + }, + .capture = { + .channels_min = 1, + .channels_max = 32, + }, +}, +{ + .name = "sai7", + .playback = { + .channels_min = 1, + .channels_max = 32, + }, + .capture = { + .channels_min = 1, + .channels_max = 32, + }, +}, +{ .name = "micfil", .capture = { .channels_min = 1, @@ -472,6 +515,11 @@ static const struct snd_sof_dsp_ops sof_imx8m_ops = { static struct snd_sof_of_mach sof_imx8mp_machs[] = { { + .compatible = "fsl,imx8mp-evk-revb4", + .sof_tplg_filename = "sof-imx8mp-wm8962.tplg", + .drv_name = "asoc-audio-graph-card2", + }, + { .compatible = "fsl,imx8mp-evk", .sof_tplg_filename = "sof-imx8mp-wm8960.tplg", .drv_name = "asoc-audio-graph-card2", diff --git a/sound/soc/sof/imx/imx8ulp.c b/sound/soc/sof/imx/imx8ulp.c index 6965791ab6ef..0704da27e69d 100644 --- a/sound/soc/sof/imx/imx8ulp.c +++ b/sound/soc/sof/imx/imx8ulp.c @@ -155,8 +155,7 @@ static int imx8ulp_reset(struct snd_sof_dev *sdev) static int imx8ulp_probe(struct snd_sof_dev *sdev) { - struct platform_device *pdev = - container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); struct device_node *np = pdev->dev.of_node; struct device_node *res_node; struct resource *mmio; diff --git a/sound/soc/sof/intel/bdw.c b/sound/soc/sof/intel/bdw.c index 5282c0071534..e1f0e38c2407 100644 --- a/sound/soc/sof/intel/bdw.c +++ b/sound/soc/sof/intel/bdw.c @@ -410,8 +410,7 @@ static int bdw_probe(struct snd_sof_dev *sdev) { struct snd_sof_pdata *pdata = sdev->pdata; const struct sof_dev_desc *desc = pdata->desc; - struct platform_device *pdev = - container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); const struct sof_intel_dsp_desc *chip; struct resource *mmio; u32 base, size; diff --git a/sound/soc/sof/intel/byt.c b/sound/soc/sof/intel/byt.c index 536d4c89d2f0..cae7dc0036c6 100644 --- a/sound/soc/sof/intel/byt.c +++ b/sound/soc/sof/intel/byt.c @@ -109,8 +109,7 @@ static int byt_acpi_probe(struct snd_sof_dev *sdev) { struct snd_sof_pdata *pdata = sdev->pdata; const struct sof_dev_desc *desc = pdata->desc; - struct platform_device *pdev = - container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); const struct sof_intel_dsp_desc *chip; struct resource *mmio; u32 base, size; diff --git a/sound/soc/sof/mediatek/mt8186/mt8186.c b/sound/soc/sof/mediatek/mt8186/mt8186.c index 9955dfa520ae..31437fdd4e92 100644 --- a/sound/soc/sof/mediatek/mt8186/mt8186.c +++ b/sound/soc/sof/mediatek/mt8186/mt8186.c @@ -238,7 +238,7 @@ static int mt8186_run(struct snd_sof_dev *sdev) static int mt8186_dsp_probe(struct snd_sof_dev *sdev) { - struct platform_device *pdev = container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); struct adsp_priv *priv; int ret; diff --git a/sound/soc/sof/mediatek/mt8195/mt8195.c b/sound/soc/sof/mediatek/mt8195/mt8195.c index 6032b566c679..371563d7ce79 100644 --- a/sound/soc/sof/mediatek/mt8195/mt8195.c +++ b/sound/soc/sof/mediatek/mt8195/mt8195.c @@ -228,7 +228,7 @@ static int mt8195_run(struct snd_sof_dev *sdev) static int mt8195_dsp_probe(struct snd_sof_dev *sdev) { - struct platform_device *pdev = container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); struct adsp_priv *priv; int ret; @@ -341,7 +341,7 @@ static int mt8195_dsp_shutdown(struct snd_sof_dev *sdev) static void mt8195_dsp_remove(struct snd_sof_dev *sdev) { - struct platform_device *pdev = container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); struct adsp_priv *priv = sdev->pdata->hw_pdata; platform_device_unregister(priv->ipc_dev); @@ -351,7 +351,7 @@ static void mt8195_dsp_remove(struct snd_sof_dev *sdev) static int mt8195_dsp_suspend(struct snd_sof_dev *sdev, u32 target_state) { - struct platform_device *pdev = container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); int ret; u32 reset_sw, dbg_pc; diff --git a/sound/soc/sof/sof-client-ipc-flood-test.c b/sound/soc/sof/sof-client-ipc-flood-test.c index b35c98896968..11b6f7da2882 100644 --- a/sound/soc/sof/sof-client-ipc-flood-test.c +++ b/sound/soc/sof/sof-client-ipc-flood-test.c @@ -158,7 +158,6 @@ static ssize_t sof_ipc_flood_dfs_write(struct file *file, const char __user *buf unsigned long ipc_duration_ms = 0; bool flood_duration_test = false; unsigned long ipc_count = 0; - struct dentry *dentry; int err; char *string; int ret; @@ -182,14 +181,7 @@ static ssize_t sof_ipc_flood_dfs_write(struct file *file, const char __user *buf * ipc_duration_ms test floods the DSP for the time specified * in the debugfs entry. */ - dentry = file->f_path.dentry; - if (strcmp(dentry->d_name.name, DEBUGFS_IPC_FLOOD_COUNT) && - strcmp(dentry->d_name.name, DEBUGFS_IPC_FLOOD_DURATION)) { - ret = -EINVAL; - goto out; - } - - if (!strcmp(dentry->d_name.name, DEBUGFS_IPC_FLOOD_DURATION)) + if (debugfs_get_aux_num(file)) flood_duration_test = true; /* test completion criterion */ @@ -252,22 +244,15 @@ static ssize_t sof_ipc_flood_dfs_read(struct file *file, char __user *buffer, struct sof_ipc_flood_priv *priv = cdev->data; size_t size_ret; - struct dentry *dentry; + if (*ppos) + return 0; - dentry = file->f_path.dentry; - if (!strcmp(dentry->d_name.name, DEBUGFS_IPC_FLOOD_COUNT) || - !strcmp(dentry->d_name.name, DEBUGFS_IPC_FLOOD_DURATION)) { - if (*ppos) - return 0; + count = min_t(size_t, count, strlen(priv->buf)); + size_ret = copy_to_user(buffer, priv->buf, count); + if (size_ret) + return -EFAULT; - count = min_t(size_t, count, strlen(priv->buf)); - size_ret = copy_to_user(buffer, priv->buf, count); - if (size_ret) - return -EFAULT; - - *ppos += count; - return count; - } + *ppos += count; return count; } @@ -320,12 +305,12 @@ static int sof_ipc_flood_probe(struct auxiliary_device *auxdev, priv->dfs_root = debugfs_create_dir(dev_name(dev), debugfs_root); if (!IS_ERR_OR_NULL(priv->dfs_root)) { /* create read-write ipc_flood_count debugfs entry */ - debugfs_create_file(DEBUGFS_IPC_FLOOD_COUNT, 0644, priv->dfs_root, - cdev, &sof_ipc_flood_fops); + debugfs_create_file_aux_num(DEBUGFS_IPC_FLOOD_COUNT, 0644, + priv->dfs_root, cdev, 0, &sof_ipc_flood_fops); /* create read-write ipc_flood_duration_ms debugfs entry */ - debugfs_create_file(DEBUGFS_IPC_FLOOD_DURATION, 0644, - priv->dfs_root, cdev, &sof_ipc_flood_fops); + debugfs_create_file_aux_num(DEBUGFS_IPC_FLOOD_DURATION, 0644, + priv->dfs_root, cdev, 1, &sof_ipc_flood_fops); if (auxdev->id == 0) { /* diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 7968d6a2f592..a97efb7b131e 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2343,6 +2343,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x2d95, 0x8021, /* VIVO USB-C-XE710 HEADSET */ QUIRK_FLAG_CTL_MSG_DELAY_1M), + DEVICE_FLG(0x2fc6, 0xf0b7, /* iBasso DC07 Pro */ + QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x30be, 0x0101, /* Schiit Hel */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x413c, 0xa506, /* Dell AE515 sound bar */ diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index e16cef160bc2..ce32cb35007d 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -95,7 +95,7 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off, ynl_attr_for_each_payload(start, data_len, attr) { astart_off = (char *)attr - (char *)start; - aend_off = astart_off + ynl_attr_data_len(attr); + aend_off = (char *)ynl_attr_data_end(attr) - (char *)start; if (aend_off <= off) continue; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d7c7d29291fb..d466447ae928 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2107,8 +2107,12 @@ static int trace__read_syscall_info(struct trace *trace, int id) return PTR_ERR(sc->tp_format); } + /* + * The tracepoint format contains __syscall_nr field, so it's one more + * than the actual number of syscall arguments. + */ if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? - RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields)) + RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields - 1)) return -ENOMEM; sc->args = sc->tp_format->format.fields; diff --git a/tools/perf/tests/shell/trace_btf_enum.sh b/tools/perf/tests/shell/trace_btf_enum.sh index 5a3b8a5a9b5c..8d1e6bbeac90 100755 --- a/tools/perf/tests/shell/trace_btf_enum.sh +++ b/tools/perf/tests/shell/trace_btf_enum.sh @@ -26,8 +26,12 @@ check_vmlinux() { trace_landlock() { echo "Tracing syscall ${syscall}" - # test flight just to see if landlock_add_rule and libbpf are available - $TESTPROG + # test flight just to see if landlock_add_rule is available + if ! perf trace $TESTPROG 2>&1 | grep -q landlock + then + echo "No landlock system call found, skipping to non-syscall tracing." + return + fi if perf trace -e $syscall $TESTPROG 2>&1 | \ grep -q -E ".*landlock_add_rule\(ruleset_fd: 11, rule_type: (LANDLOCK_RULE_PATH_BENEATH|LANDLOCK_RULE_NET_PORT), rule_attr: 0x[a-f0-9]+, flags: 45\) = -1.*" diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 0d2ea22bd9e4..31bb326b07a6 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2100,6 +2100,57 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, return 0; } +const char * const perf_disassembler__strs[] = { + [PERF_DISASM_UNKNOWN] = "unknown", + [PERF_DISASM_LLVM] = "llvm", + [PERF_DISASM_CAPSTONE] = "capstone", + [PERF_DISASM_OBJDUMP] = "objdump", +}; + + +static void annotation_options__add_disassembler(struct annotation_options *options, + enum perf_disassembler dis) +{ + for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers); i++) { + if (options->disassemblers[i] == dis) { + /* Disassembler is already present then don't add again. */ + return; + } + if (options->disassemblers[i] == PERF_DISASM_UNKNOWN) { + /* Found a free slot. */ + options->disassemblers[i] = dis; + return; + } + } + pr_err("Failed to add disassembler %d\n", dis); +} + +static int annotation_options__add_disassemblers_str(struct annotation_options *options, + const char *str) +{ + while (str && *str != '\0') { + const char *comma = strchr(str, ','); + int len = comma ? comma - str : (int)strlen(str); + bool match = false; + + for (u8 i = 0; i < ARRAY_SIZE(perf_disassembler__strs); i++) { + const char *dis_str = perf_disassembler__strs[i]; + + if (len == (int)strlen(dis_str) && !strncmp(str, dis_str, len)) { + annotation_options__add_disassembler(options, i); + match = true; + break; + } + } + if (!match) { + pr_err("Invalid disassembler '%.*s'\n", len, str); + return -1; + } + str = comma ? comma + 1 : NULL; + } + return 0; +} + static int annotation__config(const char *var, const char *value, void *data) { struct annotation_options *opt = data; @@ -2115,11 +2166,10 @@ static int annotation__config(const char *var, const char *value, void *data) else if (opt->offset_level < ANNOTATION__MIN_OFFSET_LEVEL) opt->offset_level = ANNOTATION__MIN_OFFSET_LEVEL; } else if (!strcmp(var, "annotate.disassemblers")) { - opt->disassemblers_str = strdup(value); - if (!opt->disassemblers_str) { - pr_err("Not enough memory for annotate.disassemblers\n"); - return -1; - } + int err = annotation_options__add_disassemblers_str(opt, value); + + if (err) + return err; } else if (!strcmp(var, "annotate.hide_src_code")) { opt->hide_src_code = perf_config_bool("hide_src_code", value); } else if (!strcmp(var, "annotate.jump_arrows")) { @@ -2185,9 +2235,25 @@ void annotation_options__exit(void) zfree(&annotate_opts.objdump_path); } +static void annotation_options__default_init_disassemblers(struct annotation_options *options) +{ + if (options->disassemblers[0] != PERF_DISASM_UNKNOWN) { + /* Already initialized. */ + return; + } +#ifdef HAVE_LIBLLVM_SUPPORT + annotation_options__add_disassembler(options, PERF_DISASM_LLVM); +#endif +#ifdef HAVE_LIBCAPSTONE_SUPPORT + annotation_options__add_disassembler(options, PERF_DISASM_CAPSTONE); +#endif + annotation_options__add_disassembler(options, PERF_DISASM_OBJDUMP); +} + void annotation_config__init(void) { perf_config(annotation__config, &annotate_opts); + annotation_options__default_init_disassemblers(&annotate_opts); } static unsigned int parse_percent_type(char *str1, char *str2) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 0ba5846dad4d..98db1b88daf4 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -34,8 +34,13 @@ struct annotated_data_type; #define ANNOTATION__BR_CNTR_WIDTH 30 #define ANNOTATION_DUMMY_LEN 256 -// llvm, capstone, objdump -#define MAX_DISASSEMBLERS 3 +enum perf_disassembler { + PERF_DISASM_UNKNOWN = 0, + PERF_DISASM_LLVM, + PERF_DISASM_CAPSTONE, + PERF_DISASM_OBJDUMP, +}; +#define MAX_DISASSEMBLERS (PERF_DISASM_OBJDUMP + 1) struct annotation_options { bool hide_src_code, @@ -52,14 +57,12 @@ struct annotation_options { annotate_src, full_addr; u8 offset_level; - u8 nr_disassemblers; + u8 disassemblers[MAX_DISASSEMBLERS]; int min_pcnt; int max_lines; int context; char *objdump_path; char *disassembler_style; - const char *disassemblers_str; - const char *disassemblers[MAX_DISASSEMBLERS]; const char *prefix; const char *prefix_strip; unsigned int percent_type; @@ -134,6 +137,8 @@ struct disasm_line { struct annotation_line al; }; +extern const char * const perf_disassembler__strs[]; + void annotation_line__add(struct annotation_line *al, struct list_head *head); static inline double annotation_data__percent(struct annotation_data *data, diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c index 4a62ed593e84..e4352881e3fa 100644 --- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c @@ -431,9 +431,9 @@ static bool pid_filter__has(struct pids_filtered *pids, pid_t pid) static int augment_sys_enter(void *ctx, struct syscall_enter_args *args) { bool augmented, do_output = false; - int zero = 0, size, aug_size, index, - value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value); + int zero = 0, index, value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value); u64 output = 0; /* has to be u64, otherwise it won't pass the verifier */ + s64 aug_size, size; unsigned int nr, *beauty_map; struct beauty_payload_enter *payload; void *arg, *payload_offset; @@ -484,14 +484,11 @@ static int augment_sys_enter(void *ctx, struct syscall_enter_args *args) } else if (size > 0 && size <= value_size) { /* struct */ if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, size, arg)) augmented = true; - } else if (size < 0 && size >= -6) { /* buffer */ + } else if ((int)size < 0 && size >= -6) { /* buffer */ index = -(size + 1); barrier_var(index); // Prevent clang (noticed with v18) from removing the &= 7 trick. index &= 7; // Satisfy the bounds checking with the verifier in some kernels. - aug_size = args->args[index]; - - if (aug_size > TRACE_AUG_MAX_BUF) - aug_size = TRACE_AUG_MAX_BUF; + aug_size = args->args[index] > TRACE_AUG_MAX_BUF ? TRACE_AUG_MAX_BUF : args->args[index]; if (aug_size > 0) { if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, aug_size, arg)) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 27094211edd8..5c329ad614e9 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -293,7 +293,7 @@ struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data) die = cpu__get_die_id(cpu); /* There is no die_id on legacy system. */ - if (die == -1) + if (die < 0) die = 0; /* @@ -322,7 +322,7 @@ struct aggr_cpu_id aggr_cpu_id__cluster(struct perf_cpu cpu, void *data) struct aggr_cpu_id id; /* There is no cluster_id on legacy system. */ - if (cluster == -1) + if (cluster < 0) cluster = 0; id = aggr_cpu_id__die(cpu, data); diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c index b7de4d9fd004..50c5c206b70e 100644 --- a/tools/perf/util/disasm.c +++ b/tools/perf/util/disasm.c @@ -2216,56 +2216,6 @@ out_free_command: return err; } -static int annotation_options__init_disassemblers(struct annotation_options *options) -{ - char *disassembler; - - if (options->disassemblers_str == NULL) { - const char *default_disassemblers_str = -#ifdef HAVE_LIBLLVM_SUPPORT - "llvm," -#endif -#ifdef HAVE_LIBCAPSTONE_SUPPORT - "capstone," -#endif - "objdump"; - - options->disassemblers_str = strdup(default_disassemblers_str); - if (!options->disassemblers_str) - goto out_enomem; - } - - disassembler = strdup(options->disassemblers_str); - if (disassembler == NULL) - goto out_enomem; - - while (1) { - char *comma = strchr(disassembler, ','); - - if (comma != NULL) - *comma = '\0'; - - options->disassemblers[options->nr_disassemblers++] = strim(disassembler); - - if (comma == NULL) - break; - - disassembler = comma + 1; - - if (options->nr_disassemblers >= MAX_DISASSEMBLERS) { - pr_debug("annotate.disassemblers can have at most %d entries, ignoring \"%s\"\n", - MAX_DISASSEMBLERS, disassembler); - break; - } - } - - return 0; - -out_enomem: - pr_err("Not enough memory for annotate.disassemblers\n"); - return -1; -} - int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { struct annotation_options *options = args->options; @@ -2274,7 +2224,6 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args) char symfs_filename[PATH_MAX]; bool delete_extract = false; struct kcore_extract kce; - const char *disassembler; bool decomp = false; int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename)); @@ -2334,28 +2283,26 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args) } } - err = annotation_options__init_disassemblers(options); - if (err) - goto out_remove_tmp; - err = -1; + for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers) && err != 0; i++) { + enum perf_disassembler dis = options->disassemblers[i]; - for (int i = 0; i < options->nr_disassemblers && err != 0; ++i) { - disassembler = options->disassemblers[i]; - - if (!strcmp(disassembler, "llvm")) + switch (dis) { + case PERF_DISASM_LLVM: err = symbol__disassemble_llvm(symfs_filename, sym, args); - else if (!strcmp(disassembler, "capstone")) + break; + case PERF_DISASM_CAPSTONE: err = symbol__disassemble_capstone(symfs_filename, sym, args); - else if (!strcmp(disassembler, "objdump")) + break; + case PERF_DISASM_OBJDUMP: err = symbol__disassemble_objdump(symfs_filename, sym, args); - else - pr_debug("Unknown disassembler %s, skipping...\n", disassembler); - } - - if (err == 0) { - pr_debug("Disassembled with %s\nannotate.disassemblers=%s\n", - disassembler, options->disassemblers_str); + break; + case PERF_DISASM_UNKNOWN: /* End of disassemblers. */ + default: + goto out_remove_tmp; + } + if (err == 0) + pr_debug("Disassembled with %s\n", perf_disassembler__strs[dis]); } out_remove_tmp: if (decomp) diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index d0337c11f9ee..cc8948f49117 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -725,7 +725,7 @@ static void default_mock_decoder(struct cxl_decoder *cxld) cxld->reset = mock_decoder_reset; } -static int first_decoder(struct device *dev, void *data) +static int first_decoder(struct device *dev, const void *data) { struct cxl_decoder *cxld; diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 347c1e7b37bd..8d731bd63988 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -401,6 +401,10 @@ struct cxl_test_gen_media gen_media = { .channel = 1, .rank = 30, }, + .component_id = { 0x3, 0x74, 0xc5, 0x8, 0x9a, 0x1a, 0xb, 0xfc, 0xd2, 0x7e, 0x2f, 0x31, 0x9b, 0x3c, 0x81, 0x4d }, + .cme_threshold_ev_flags = 3, + .cme_count = { 33, 0, 0 }, + .sub_type = 0x2, }, }; @@ -429,6 +433,11 @@ struct cxl_test_dram dram = { .bank_group = 5, .bank = 2, .column = {0xDE, 0xAD}, + .component_id = { 0x1, 0x74, 0xc5, 0x8, 0x9a, 0x1a, 0xb, 0xfc, 0xd2, 0x7e, 0x2f, 0x31, 0x9b, 0x3c, 0x81, 0x4d }, + .sub_channel = 8, + .cme_threshold_ev_flags = 2, + .cvme_count = { 14, 0, 0 }, + .sub_type = 0x5, }, }; @@ -456,7 +465,10 @@ struct cxl_test_mem_module mem_module = { .dirty_shutdown_cnt = { 0xde, 0xad, 0xbe, 0xef }, .cor_vol_err_cnt = { 0xde, 0xad, 0xbe, 0xef }, .cor_per_err_cnt = { 0xde, 0xad, 0xbe, 0xef }, - } + }, + /* .validity_flags = <set below> */ + .component_id = { 0x2, 0x74, 0xc5, 0x8, 0x9a, 0x1a, 0xb, 0xfc, 0xd2, 0x7e, 0x2f, 0x31, 0x9b, 0x3c, 0x81, 0x4d }, + .event_sub_type = 0x3, }, }; @@ -478,13 +490,18 @@ static int mock_set_timestamp(struct cxl_dev_state *cxlds, static void cxl_mock_add_event_logs(struct mock_event_store *mes) { - put_unaligned_le16(CXL_GMER_VALID_CHANNEL | CXL_GMER_VALID_RANK, + put_unaligned_le16(CXL_GMER_VALID_CHANNEL | CXL_GMER_VALID_RANK | + CXL_GMER_VALID_COMPONENT | CXL_GMER_VALID_COMPONENT_ID_FORMAT, &gen_media.rec.media_hdr.validity_flags); put_unaligned_le16(CXL_DER_VALID_CHANNEL | CXL_DER_VALID_BANK_GROUP | - CXL_DER_VALID_BANK | CXL_DER_VALID_COLUMN, + CXL_DER_VALID_BANK | CXL_DER_VALID_COLUMN | CXL_DER_VALID_SUB_CHANNEL | + CXL_DER_VALID_COMPONENT | CXL_DER_VALID_COMPONENT_ID_FORMAT, &dram.rec.media_hdr.validity_flags); + put_unaligned_le16(CXL_MMER_VALID_COMPONENT | CXL_MMER_VALID_COMPONENT_ID_FORMAT, + &mem_module.rec.validity_flags); + mes_add_event(mes, CXL_EVENT_TYPE_INFO, &maint_needed); mes_add_event(mes, CXL_EVENT_TYPE_INFO, (struct cxl_event_record_raw *)&gen_media); diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 450c7566c33f..af2594e4f35d 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -228,16 +228,16 @@ int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds, } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, "CXL"); -int __wrap_cxl_dvsec_rr_decode(struct device *dev, struct cxl_port *port, +int __wrap_cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, struct cxl_endpoint_dvsec_info *info) { int rc = 0, index; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - if (ops && ops->is_mock_dev(dev)) + if (ops && ops->is_mock_dev(cxlds->dev)) rc = 0; else - rc = cxl_dvsec_rr_decode(dev, port, info); + rc = cxl_dvsec_rr_decode(cxlds, info); put_cxl_mock_ops(index); return rc; diff --git a/tools/testing/selftests/bpf/progs/find_vma.c b/tools/testing/selftests/bpf/progs/find_vma.c index 38034fb82530..02b82774469c 100644 --- a/tools/testing/selftests/bpf/progs/find_vma.c +++ b/tools/testing/selftests/bpf/progs/find_vma.c @@ -25,7 +25,7 @@ static long check_vma(struct task_struct *task, struct vm_area_struct *vma, { if (vma->vm_file) bpf_probe_read_kernel_str(d_iname, DNAME_INLINE_LEN - 1, - vma->vm_file->f_path.dentry->d_iname); + vma->vm_file->f_path.dentry->d_shortname.string); /* check for VM_EXEC */ if (vma->vm_flags & VM_EXEC) diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh index 384cfa3d38a6..92c2f0376c08 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -142,7 +142,7 @@ function pre_ethtool { } function check_table { - local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1 + local path=$NSIM_DEV_DFS/ports/$port/udp_ports/table$1 local -n expected=$2 local last=$3 @@ -212,7 +212,7 @@ function check_tables { } function print_table { - local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1 + local path=$NSIM_DEV_DFS/ports/$port/udp_ports/table$1 read -a have < $path tree $NSIM_DEV_DFS/ @@ -641,7 +641,7 @@ for port in 0 1; do NSIM_NETDEV=`get_netdev_name old_netdevs` ip link set dev $NSIM_NETDEV up - echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error + echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports/inject_error msg="1 - create VxLANs v6" exp0=( 0 0 0 0 ) @@ -663,7 +663,7 @@ for port in 0 1; do new_geneve gnv0 20000 msg="2 - destroy GENEVE" - echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error + echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports/inject_error exp1=( `mke 20000 2` 0 0 0 ) del_dev gnv0 @@ -764,7 +764,7 @@ for port in 0 1; do msg="create VxLANs v4" new_vxlan vxlan0 10000 $NSIM_NETDEV - echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset check_tables msg="NIC device goes down" @@ -775,7 +775,7 @@ for port in 0 1; do fi check_tables - echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset check_tables msg="NIC device goes up again" @@ -789,7 +789,7 @@ for port in 0 1; do del_dev vxlan0 check_tables - echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset check_tables msg="destroy NIC" @@ -896,7 +896,7 @@ msg="vacate VxLAN in overflow table" exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` ) del_dev vxlan2 -echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset +echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset check_tables msg="tunnels destroyed 2" diff --git a/tools/testing/selftests/exec/check-exec.c b/tools/testing/selftests/exec/check-exec.c index 4d3f4525e1e1..55bce47e56b7 100644 --- a/tools/testing/selftests/exec/check-exec.c +++ b/tools/testing/selftests/exec/check-exec.c @@ -22,6 +22,7 @@ #include <sys/prctl.h> #include <sys/socket.h> #include <sys/stat.h> +#include <sys/syscall.h> #include <sys/sysmacros.h> #include <unistd.h> @@ -31,6 +32,12 @@ #include "../kselftest_harness.h" +static int sys_execveat(int dirfd, const char *pathname, char *const argv[], + char *const envp[], int flags) +{ + return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags); +} + static void drop_privileges(struct __test_metadata *const _metadata) { const unsigned int noroot = SECBIT_NOROOT | SECBIT_NOROOT_LOCKED; @@ -219,8 +226,8 @@ static void test_exec_fd(struct __test_metadata *_metadata, const int fd, * test framework as an error. With AT_EXECVE_CHECK, we only check a * potential successful execution. */ - access_ret = - execveat(fd, "", argv, NULL, AT_EMPTY_PATH | AT_EXECVE_CHECK); + access_ret = sys_execveat(fd, "", argv, NULL, + AT_EMPTY_PATH | AT_EXECVE_CHECK); access_errno = errno; if (err_code) { EXPECT_EQ(-1, access_ret); diff --git a/tools/testing/selftests/gpio/gpio-sim.sh b/tools/testing/selftests/gpio/gpio-sim.sh index 6fb66a687f17..bbc29ed9c60a 100755 --- a/tools/testing/selftests/gpio/gpio-sim.sh +++ b/tools/testing/selftests/gpio/gpio-sim.sh @@ -46,12 +46,6 @@ remove_chip() { rmdir $CONFIGFS_DIR/$CHIP || fail "Unable to remove the chip" } -configfs_cleanup() { - for CHIP in `ls $CONFIGFS_DIR/`; do - remove_chip $CHIP - done -} - create_chip() { local CHIP=$1 @@ -105,6 +99,13 @@ disable_chip() { echo 0 > $CONFIGFS_DIR/$CHIP/live || fail "Unable to disable the chip" } +configfs_cleanup() { + for CHIP in `ls $CONFIGFS_DIR/`; do + disable_chip $CHIP + remove_chip $CHIP + done +} + configfs_chip_name() { local CHIP=$1 local BANK=$2 @@ -181,6 +182,7 @@ create_chip chip create_bank chip bank enable_chip chip test -n `cat $CONFIGFS_DIR/chip/bank/chip_name` || fail "chip_name doesn't work" +disable_chip chip remove_chip chip echo "1.2. chip_name returns 'none' if the chip is still pending" @@ -195,6 +197,7 @@ create_chip chip create_bank chip bank enable_chip chip test -n `cat $CONFIGFS_DIR/chip/dev_name` || fail "dev_name doesn't work" +disable_chip chip remove_chip chip echo "2. Creating and configuring simulated chips" @@ -204,6 +207,7 @@ create_chip chip create_bank chip bank enable_chip chip test "`get_chip_num_lines chip bank`" = "1" || fail "default number of lines is not 1" +disable_chip chip remove_chip chip echo "2.2. Number of lines can be specified" @@ -212,6 +216,7 @@ create_bank chip bank set_num_lines chip bank 16 enable_chip chip test "`get_chip_num_lines chip bank`" = "16" || fail "number of lines is not 16" +disable_chip chip remove_chip chip echo "2.3. Label can be set" @@ -220,6 +225,7 @@ create_bank chip bank set_label chip bank foobar enable_chip chip test "`get_chip_label chip bank`" = "foobar" || fail "label is incorrect" +disable_chip chip remove_chip chip echo "2.4. Label can be left empty" @@ -227,6 +233,7 @@ create_chip chip create_bank chip bank enable_chip chip test -z "`cat $CONFIGFS_DIR/chip/bank/label`" || fail "label is not empty" +disable_chip chip remove_chip chip echo "2.5. Line names can be configured" @@ -238,6 +245,7 @@ set_line_name chip bank 2 bar enable_chip chip test "`get_line_name chip bank 0`" = "foo" || fail "line name is incorrect" test "`get_line_name chip bank 2`" = "bar" || fail "line name is incorrect" +disable_chip chip remove_chip chip echo "2.6. Line config can remain unused if offset is greater than number of lines" @@ -248,6 +256,7 @@ set_line_name chip bank 5 foobar enable_chip chip test "`get_line_name chip bank 0`" = "" || fail "line name is incorrect" test "`get_line_name chip bank 1`" = "" || fail "line name is incorrect" +disable_chip chip remove_chip chip echo "2.7. Line configfs directory names are sanitized" @@ -267,6 +276,7 @@ for CHIP in $CHIPS; do enable_chip $CHIP done for CHIP in $CHIPS; do + disable_chip $CHIP remove_chip $CHIP done @@ -278,6 +288,7 @@ echo foobar > $CONFIGFS_DIR/chip/bank/label 2> /dev/null && \ fail "Setting label of a live chip should fail" echo 8 > $CONFIGFS_DIR/chip/bank/num_lines 2> /dev/null && \ fail "Setting number of lines of a live chip should fail" +disable_chip chip remove_chip chip echo "2.10. Can't create line items when chip is live" @@ -285,6 +296,7 @@ create_chip chip create_bank chip bank enable_chip chip mkdir $CONFIGFS_DIR/chip/bank/line0 2> /dev/null && fail "Creating line item should fail" +disable_chip chip remove_chip chip echo "2.11. Probe errors are propagated to user-space" @@ -316,6 +328,7 @@ mkdir -p $CONFIGFS_DIR/chip/bank/line4/hog enable_chip chip $BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 4 2> /dev/null && \ fail "Setting the value of a hogged line shouldn't succeed" +disable_chip chip remove_chip chip echo "3. Controlling simulated chips" @@ -331,6 +344,7 @@ test "$?" = "1" || fail "pull set incorrectly" sysfs_set_pull chip bank 0 pull-down $BASE_DIR/gpio-mockup-cdev /dev/`configfs_chip_name chip bank` 1 test "$?" = "0" || fail "pull set incorrectly" +disable_chip chip remove_chip chip echo "3.2. Pull can be read from sysfs" @@ -344,6 +358,7 @@ SYSFS_PATH=/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull test `cat $SYSFS_PATH` = "pull-down" || fail "reading the pull failed" sysfs_set_pull chip bank 0 pull-up test `cat $SYSFS_PATH` = "pull-up" || fail "reading the pull failed" +disable_chip chip remove_chip chip echo "3.3. Incorrect input in sysfs is rejected" @@ -355,6 +370,7 @@ DEVNAME=`configfs_dev_name chip` CHIPNAME=`configfs_chip_name chip bank` SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull" echo foobar > $SYSFS_PATH 2> /dev/null && fail "invalid input not detected" +disable_chip chip remove_chip chip echo "3.4. Can't write to value" @@ -365,6 +381,7 @@ DEVNAME=`configfs_dev_name chip` CHIPNAME=`configfs_chip_name chip bank` SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value" echo 1 > $SYSFS_PATH 2> /dev/null && fail "writing to 'value' succeeded unexpectedly" +disable_chip chip remove_chip chip echo "4. Simulated GPIO chips are functional" @@ -382,6 +399,7 @@ $BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 0 & sleep 0.1 # FIXME Any better way? test `cat $SYSFS_PATH` = "1" || fail "incorrect value read from sysfs" kill $! +disable_chip chip remove_chip chip echo "4.2. Bias settings work correctly" @@ -394,6 +412,7 @@ CHIPNAME=`configfs_chip_name chip bank` SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value" $BASE_DIR/gpio-mockup-cdev -b pull-up /dev/`configfs_chip_name chip bank` 0 test `cat $SYSFS_PATH` = "1" || fail "bias setting does not work" +disable_chip chip remove_chip chip echo "GPIO $MODULE test PASS" diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index 2af86bd796ba..aa6f2c1cbec7 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -59,6 +59,12 @@ int open_tree(int dfd, const char *filename, unsigned int flags) } #endif +static int sys_execveat(int dirfd, const char *pathname, char *const argv[], + char *const envp[], int flags) +{ + return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags); +} + #ifndef RENAME_EXCHANGE #define RENAME_EXCHANGE (1 << 1) #endif @@ -2024,8 +2030,8 @@ static void test_check_exec(struct __test_metadata *const _metadata, int ret; char *const argv[] = { (char *)path, NULL }; - ret = execveat(AT_FDCWD, path, argv, NULL, - AT_EMPTY_PATH | AT_EXECVE_CHECK); + ret = sys_execveat(AT_FDCWD, path, argv, NULL, + AT_EMPTY_PATH | AT_EXECVE_CHECK); if (err) { EXPECT_EQ(-1, ret); EXPECT_EQ(errno, err); diff --git a/tools/testing/selftests/net/bpf_offload.py b/tools/testing/selftests/net/bpf_offload.py index d10f420e4ef6..fd0d959914e4 100755 --- a/tools/testing/selftests/net/bpf_offload.py +++ b/tools/testing/selftests/net/bpf_offload.py @@ -215,12 +215,14 @@ def bpftool_map_list_wait(expected=0, n_retry=20, ns=""): raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps)) def bpftool_prog_load(sample, file_name, maps=[], prog_type="xdp", dev=None, - fail=True, include_stderr=False): + fail=True, include_stderr=False, dev_bind=None): args = "prog load %s %s" % (os.path.join(bpf_test_dir, sample), file_name) if prog_type is not None: args += " type " + prog_type if dev is not None: args += " dev " + dev + elif dev_bind is not None: + args += " xdpmeta_dev " + dev_bind if len(maps): args += " map " + " map ".join(maps) @@ -980,6 +982,16 @@ try: rm("/sys/fs/bpf/offload") sim.wait_for_flush() + bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/devbound", + dev_bind=sim['ifname']) + devbound = bpf_pinned("/sys/fs/bpf/devbound") + start_test("Test dev-bound program in generic mode...") + ret, _, err = sim.set_xdp(devbound, "generic", fail=False, include_stderr=True) + fail(ret == 0, "devbound program in generic mode allowed") + check_extack(err, "Can't attach device-bound programs in generic mode.", args) + rm("/sys/fs/bpf/devbound") + sim.wait_for_flush() + start_test("Test XDP load failure...") sim.dfs["dev/bpf_bind_verifier_accept"] = 0 ret, _, err = bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/offload", diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile index 18b9443454a9..bc6b6762baf3 100644 --- a/tools/testing/selftests/net/lib/Makefile +++ b/tools/testing/selftests/net/lib/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../../usr/include/ $(KHDR_INCLUDES) # Additional include paths needed by kselftest.h CFLAGS += -I../../ diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index 8e3fc05a5397..c76525fe2b84 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -2,7 +2,7 @@ top_srcdir = ../../../../.. -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ simult_flows.sh mptcp_sockopt.sh userspace_pm.sh diff --git a/tools/testing/selftests/net/openvswitch/Makefile b/tools/testing/selftests/net/openvswitch/Makefile index 2f1508abc826..3fd1da2ec07d 100644 --- a/tools/testing/selftests/net/openvswitch/Makefile +++ b/tools/testing/selftests/net/openvswitch/Makefile @@ -2,7 +2,7 @@ top_srcdir = ../../../../.. -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) TEST_PROGS := openvswitch.sh diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh index e15c43b7359b..ef8b25a606d8 100755 --- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -39,11 +39,13 @@ if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then # xfail tests that are known flaky with dbg config, not fixable. # still run them for coverage (and expect 100% pass without dbg). declare -ar xfail_list=( + "tcp_eor_no-coalesce-retrans.pkt" "tcp_fast_recovery_prr-ss.*.pkt" + "tcp_slow_start_slow-start-after-win-update.pkt" "tcp_timestamping.*.pkt" "tcp_user_timeout_user-timeout-probe.pkt" "tcp_zerocopy_epoll_.*.pkt" - "tcp_tcp_info_tcp-info-*-limited.pkt" + "tcp_tcp_info_tcp-info-.*-limited.pkt" ) readonly xfail_regex="^($(printf '%s|' "${xfail_list[@]}"))$" [[ "$script" =~ ${xfail_regex} ]] && failfunc=ktap_test_xfail diff --git a/tools/testing/selftests/riscv/vector/.gitignore b/tools/testing/selftests/riscv/vector/.gitignore index 9ae7964491d5..7d9c87cd0649 100644 --- a/tools/testing/selftests/riscv/vector/.gitignore +++ b/tools/testing/selftests/riscv/vector/.gitignore @@ -1,3 +1,4 @@ vstate_exec_nolibc vstate_prctl -v_initval_nolibc +v_initval +v_exec_initval_nolibc diff --git a/tools/testing/selftests/riscv/vector/Makefile b/tools/testing/selftests/riscv/vector/Makefile index bfff0ff4f3be..6f7497f4e7b3 100644 --- a/tools/testing/selftests/riscv/vector/Makefile +++ b/tools/testing/selftests/riscv/vector/Makefile @@ -2,18 +2,27 @@ # Copyright (C) 2021 ARM Limited # Originally tools/testing/arm64/abi/Makefile -TEST_GEN_PROGS := vstate_prctl v_initval_nolibc -TEST_GEN_PROGS_EXTENDED := vstate_exec_nolibc +TEST_GEN_PROGS := v_initval vstate_prctl +TEST_GEN_PROGS_EXTENDED := vstate_exec_nolibc v_exec_initval_nolibc include ../../lib.mk -$(OUTPUT)/vstate_prctl: vstate_prctl.c ../hwprobe/sys_hwprobe.S +$(OUTPUT)/sys_hwprobe.o: ../hwprobe/sys_hwprobe.S + $(CC) -static -c -o$@ $(CFLAGS) $^ + +$(OUTPUT)/v_helpers.o: v_helpers.c + $(CC) -static -c -o$@ $(CFLAGS) $^ + +$(OUTPUT)/vstate_prctl: vstate_prctl.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^ $(OUTPUT)/vstate_exec_nolibc: vstate_exec_nolibc.c $(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \ -Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc -$(OUTPUT)/v_initval_nolibc: v_initval_nolibc.c +$(OUTPUT)/v_initval: v_initval.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o + $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^ + +$(OUTPUT)/v_exec_initval_nolibc: v_exec_initval_nolibc.c $(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \ -Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc diff --git a/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c new file mode 100644 index 000000000000..35c0812e32de --- /dev/null +++ b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Get values of vector registers as soon as the program starts to test if + * is properly cleaning the values before starting a new program. Vector + * registers are caller saved, so no function calls may happen before reading + * the values. To further ensure consistency, this file is compiled without + * libc and without auto-vectorization. + * + * To be "clean" all values must be either all ones or all zeroes. + */ + +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) + +int main(int argc, char **argv) +{ + char prev_value = 0, value; + unsigned long vl; + int first = 1; + + if (argc > 2 && strcmp(argv[2], "x")) + asm volatile ( + // 0 | zimm[10:0] | rs1 | 1 1 1 | rd |1010111| vsetvli + // vsetvli t4, x0, e8, m1, d1 + ".4byte 0b00000000000000000111111011010111\n\t" + "mv %[vl], t4\n\t" + : [vl] "=r" (vl) : : "t4" + ); + else + asm volatile ( + ".option push\n\t" + ".option arch, +v\n\t" + "vsetvli %[vl], x0, e8, m1, ta, ma\n\t" + ".option pop\n\t" + : [vl] "=r" (vl) + ); + +#define CHECK_VECTOR_REGISTER(register) ({ \ + for (int i = 0; i < vl; i++) { \ + asm volatile ( \ + ".option push\n\t" \ + ".option arch, +v\n\t" \ + "vmv.x.s %0, " __stringify(register) "\n\t" \ + "vsrl.vi " __stringify(register) ", " __stringify(register) ", 8\n\t" \ + ".option pop\n\t" \ + : "=r" (value)); \ + if (first) { \ + first = 0; \ + } else if (value != prev_value || !(value == 0x00 || value == 0xff)) { \ + printf("Register " __stringify(register) \ + " values not clean! value: %u\n", value); \ + exit(-1); \ + } \ + prev_value = value; \ + } \ +}) + + CHECK_VECTOR_REGISTER(v0); + CHECK_VECTOR_REGISTER(v1); + CHECK_VECTOR_REGISTER(v2); + CHECK_VECTOR_REGISTER(v3); + CHECK_VECTOR_REGISTER(v4); + CHECK_VECTOR_REGISTER(v5); + CHECK_VECTOR_REGISTER(v6); + CHECK_VECTOR_REGISTER(v7); + CHECK_VECTOR_REGISTER(v8); + CHECK_VECTOR_REGISTER(v9); + CHECK_VECTOR_REGISTER(v10); + CHECK_VECTOR_REGISTER(v11); + CHECK_VECTOR_REGISTER(v12); + CHECK_VECTOR_REGISTER(v13); + CHECK_VECTOR_REGISTER(v14); + CHECK_VECTOR_REGISTER(v15); + CHECK_VECTOR_REGISTER(v16); + CHECK_VECTOR_REGISTER(v17); + CHECK_VECTOR_REGISTER(v18); + CHECK_VECTOR_REGISTER(v19); + CHECK_VECTOR_REGISTER(v20); + CHECK_VECTOR_REGISTER(v21); + CHECK_VECTOR_REGISTER(v22); + CHECK_VECTOR_REGISTER(v23); + CHECK_VECTOR_REGISTER(v24); + CHECK_VECTOR_REGISTER(v25); + CHECK_VECTOR_REGISTER(v26); + CHECK_VECTOR_REGISTER(v27); + CHECK_VECTOR_REGISTER(v28); + CHECK_VECTOR_REGISTER(v29); + CHECK_VECTOR_REGISTER(v30); + CHECK_VECTOR_REGISTER(v31); + +#undef CHECK_VECTOR_REGISTER + + return 0; +} diff --git a/tools/testing/selftests/riscv/vector/v_helpers.c b/tools/testing/selftests/riscv/vector/v_helpers.c new file mode 100644 index 000000000000..01a8799dcb78 --- /dev/null +++ b/tools/testing/selftests/riscv/vector/v_helpers.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "../hwprobe/hwprobe.h" +#include <asm/vendor/thead.h> +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/wait.h> + +bool is_xtheadvector_supported(void) +{ + struct riscv_hwprobe pair; + + pair.key = RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0; + riscv_hwprobe(&pair, 1, 0, NULL, 0); + return pair.value & RISCV_HWPROBE_VENDOR_EXT_XTHEADVECTOR; +} + +bool is_vector_supported(void) +{ + struct riscv_hwprobe pair; + + pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0; + riscv_hwprobe(&pair, 1, 0, NULL, 0); + return pair.value & RISCV_HWPROBE_EXT_ZVE32X; +} + +int launch_test(char *next_program, int test_inherit, int xtheadvector) +{ + char *exec_argv[4], *exec_envp[1]; + int rc, pid, status; + + pid = fork(); + if (pid < 0) { + printf("fork failed %d", pid); + return -1; + } + + if (!pid) { + exec_argv[0] = next_program; + exec_argv[1] = test_inherit != 0 ? "x" : NULL; + exec_argv[2] = xtheadvector != 0 ? "x" : NULL; + exec_argv[3] = NULL; + exec_envp[0] = NULL; + /* launch the program again to check inherit */ + rc = execve(next_program, exec_argv, exec_envp); + if (rc) { + perror("execve"); + printf("child execve failed %d\n", rc); + exit(-1); + } + } + + rc = waitpid(-1, &status, 0); + if (rc < 0) { + printf("waitpid failed\n"); + return -3; + } + + if ((WIFEXITED(status) && WEXITSTATUS(status) == -1) || + WIFSIGNALED(status)) { + printf("child exited abnormally\n"); + return -4; + } + + return WEXITSTATUS(status); +} diff --git a/tools/testing/selftests/riscv/vector/v_helpers.h b/tools/testing/selftests/riscv/vector/v_helpers.h new file mode 100644 index 000000000000..763cddfe26da --- /dev/null +++ b/tools/testing/selftests/riscv/vector/v_helpers.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include <stdbool.h> + +bool is_xtheadvector_supported(void); + +bool is_vector_supported(void); + +int launch_test(char *next_program, int test_inherit, int xtheadvector); diff --git a/tools/testing/selftests/riscv/vector/v_initval.c b/tools/testing/selftests/riscv/vector/v_initval.c new file mode 100644 index 000000000000..be9e1d18ad29 --- /dev/null +++ b/tools/testing/selftests/riscv/vector/v_initval.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "../../kselftest_harness.h" +#include "v_helpers.h" + +#define NEXT_PROGRAM "./v_exec_initval_nolibc" + +TEST(v_initval) +{ + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); + } + + ASSERT_EQ(0, launch_test(NEXT_PROGRAM, 0, xtheadvector)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c deleted file mode 100644 index 6174ffe016dc..000000000000 --- a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c +++ /dev/null @@ -1,72 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only - -#include "../../kselftest.h" -#define MAX_VSIZE (8192 * 32) - -void dump(char *ptr, int size) -{ - int i = 0; - - for (i = 0; i < size; i++) { - if (i != 0) { - if (i % 16 == 0) - printf("\n"); - else if (i % 8 == 0) - printf(" "); - } - printf("%02x ", ptr[i]); - } - printf("\n"); -} - -int main(void) -{ - int i; - unsigned long vl; - char *datap, *tmp; - - ksft_set_plan(1); - - datap = malloc(MAX_VSIZE); - if (!datap) { - ksft_test_result_fail("fail to allocate memory for size = %d\n", MAX_VSIZE); - exit(-1); - } - - tmp = datap; - asm volatile ( - ".option push\n\t" - ".option arch, +v\n\t" - "vsetvli %0, x0, e8, m8, ta, ma\n\t" - "vse8.v v0, (%2)\n\t" - "add %1, %2, %0\n\t" - "vse8.v v8, (%1)\n\t" - "add %1, %1, %0\n\t" - "vse8.v v16, (%1)\n\t" - "add %1, %1, %0\n\t" - "vse8.v v24, (%1)\n\t" - ".option pop\n\t" - : "=&r" (vl), "=r" (tmp) : "r" (datap) : "memory"); - - ksft_print_msg("vl = %lu\n", vl); - - if (datap[0] != 0x00 && datap[0] != 0xff) { - ksft_test_result_fail("v-regesters are not properly initialized\n"); - dump(datap, vl * 4); - exit(-1); - } - - for (i = 1; i < vl * 4; i++) { - if (datap[i] != datap[0]) { - ksft_test_result_fail("detect stale values on v-regesters\n"); - dump(datap, vl * 4); - exit(-2); - } - } - - free(datap); - - ksft_test_result_pass("tests for v_initval_nolibc pass\n"); - ksft_exit_pass(); - return 0; -} diff --git a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c index 1f9969bed235..7b7d6f21acb4 100644 --- a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c +++ b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c @@ -6,13 +6,16 @@ int main(int argc, char **argv) { - int rc, pid, status, test_inherit = 0; + int rc, pid, status, test_inherit = 0, xtheadvector = 0; long ctrl, ctrl_c; char *exec_argv[2], *exec_envp[2]; - if (argc > 1) + if (argc > 1 && strcmp(argv[1], "x")) test_inherit = 1; + if (argc > 2 && strcmp(argv[2], "x")) + xtheadvector = 1; + ctrl = my_syscall1(__NR_prctl, PR_RISCV_V_GET_CONTROL); if (ctrl < 0) { puts("PR_RISCV_V_GET_CONTROL is not supported\n"); @@ -53,11 +56,14 @@ int main(int argc, char **argv) puts("child's vstate_ctrl not equal to parent's\n"); exit(-1); } - asm volatile (".option push\n\t" - ".option arch, +v\n\t" - "vsetvli x0, x0, e32, m8, ta, ma\n\t" - ".option pop\n\t" - ); + if (xtheadvector) + asm volatile (".4byte 0x00007ed7"); + else + asm volatile (".option push\n\t" + ".option arch, +v\n\t" + "vsetvli x0, x0, e32, m8, ta, ma\n\t" + ".option pop\n\t" + ); exit(ctrl); } } diff --git a/tools/testing/selftests/riscv/vector/vstate_prctl.c b/tools/testing/selftests/riscv/vector/vstate_prctl.c index 40b3bffcbb40..62fbb17a0556 100644 --- a/tools/testing/selftests/riscv/vector/vstate_prctl.c +++ b/tools/testing/selftests/riscv/vector/vstate_prctl.c @@ -3,181 +3,244 @@ #include <unistd.h> #include <errno.h> #include <sys/wait.h> +#include <sys/types.h> +#include <stdlib.h> -#include "../hwprobe/hwprobe.h" -#include "../../kselftest.h" +#include "../../kselftest_harness.h" +#include "v_helpers.h" #define NEXT_PROGRAM "./vstate_exec_nolibc" -static int launch_test(int test_inherit) -{ - char *exec_argv[3], *exec_envp[1]; - int rc, pid, status; - - pid = fork(); - if (pid < 0) { - ksft_test_result_fail("fork failed %d", pid); - return -1; - } - if (!pid) { - exec_argv[0] = NEXT_PROGRAM; - exec_argv[1] = test_inherit != 0 ? "x" : NULL; - exec_argv[2] = NULL; - exec_envp[0] = NULL; - /* launch the program again to check inherit */ - rc = execve(NEXT_PROGRAM, exec_argv, exec_envp); - if (rc) { - perror("execve"); - ksft_test_result_fail("child execve failed %d\n", rc); - exit(-1); - } - } - - rc = waitpid(-1, &status, 0); - if (rc < 0) { - ksft_test_result_fail("waitpid failed\n"); - return -3; - } - - if ((WIFEXITED(status) && WEXITSTATUS(status) == -1) || - WIFSIGNALED(status)) { - ksft_test_result_fail("child exited abnormally\n"); - return -4; - } - - return WEXITSTATUS(status); -} - -int test_and_compare_child(long provided, long expected, int inherit) +int test_and_compare_child(long provided, long expected, int inherit, int xtheadvector) { int rc; rc = prctl(PR_RISCV_V_SET_CONTROL, provided); if (rc != 0) { - ksft_test_result_fail("prctl with provided arg %lx failed with code %d\n", - provided, rc); + printf("prctl with provided arg %lx failed with code %d\n", + provided, rc); return -1; } - rc = launch_test(inherit); + rc = launch_test(NEXT_PROGRAM, inherit, xtheadvector); if (rc != expected) { - ksft_test_result_fail("Test failed, check %d != %ld\n", rc, - expected); + printf("Test failed, check %d != %ld\n", rc, expected); return -2; } return 0; } -#define PR_RISCV_V_VSTATE_CTRL_CUR_SHIFT 0 -#define PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT 2 +#define PR_RISCV_V_VSTATE_CTRL_CUR_SHIFT 0 +#define PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT 2 -int main(void) +TEST(get_control_no_v) { - struct riscv_hwprobe pair; - long flag, expected; long rc; - ksft_set_plan(1); + if (is_vector_supported() || is_xtheadvector_supported()) + SKIP(return, "Test expects vector to be not supported"); - pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0; - rc = riscv_hwprobe(&pair, 1, 0, NULL, 0); - if (rc < 0) { - ksft_test_result_fail("hwprobe() failed with %ld\n", rc); - return -1; - } + rc = prctl(PR_RISCV_V_GET_CONTROL); + EXPECT_EQ(-1, rc) + TH_LOG("GET_CONTROL should fail on kernel/hw without ZVE32X"); + EXPECT_EQ(EINVAL, errno) + TH_LOG("GET_CONTROL should fail on kernel/hw without ZVE32X"); +} - if (pair.key != RISCV_HWPROBE_KEY_IMA_EXT_0) { - ksft_test_result_fail("hwprobe cannot probe RISCV_HWPROBE_KEY_IMA_EXT_0\n"); - return -2; - } +TEST(set_control_no_v) +{ + long rc; - if (!(pair.value & RISCV_HWPROBE_EXT_ZVE32X)) { - rc = prctl(PR_RISCV_V_GET_CONTROL); - if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without ZVE32X\n"); - return -3; - } - - rc = prctl(PR_RISCV_V_SET_CONTROL, PR_RISCV_V_VSTATE_CTRL_ON); - if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("SET_CONTROL should fail on kernel/hw without ZVE32X\n"); - return -4; - } - - ksft_test_result_skip("Vector not supported\n"); - return 0; - } + if (is_vector_supported() || is_xtheadvector_supported()) + SKIP(return, "Test expects vector to be not supported"); + + rc = prctl(PR_RISCV_V_SET_CONTROL, PR_RISCV_V_VSTATE_CTRL_ON); + EXPECT_EQ(-1, rc) + TH_LOG("SET_CONTROL should fail on kernel/hw without ZVE32X"); + EXPECT_EQ(EINVAL, errno) + TH_LOG("SET_CONTROL should fail on kernel/hw without ZVE32X"); +} + +TEST(vstate_on_current) +{ + long flag; + long rc; + + if (!is_vector_supported() && !is_xtheadvector_supported()) + SKIP(return, "Vector not supported"); flag = PR_RISCV_V_VSTATE_CTRL_ON; rc = prctl(PR_RISCV_V_SET_CONTROL, flag); - if (rc != 0) { - ksft_test_result_fail("Enabling V for current should always success\n"); - return -5; - } + EXPECT_EQ(0, rc) TH_LOG("Enabling V for current should always succeed"); +} + +TEST(vstate_off_eperm) +{ + long flag; + long rc; + + if (!is_vector_supported() && !is_xtheadvector_supported()) + SKIP(return, "Vector not supported"); flag = PR_RISCV_V_VSTATE_CTRL_OFF; rc = prctl(PR_RISCV_V_SET_CONTROL, flag); - if (rc != -1 || errno != EPERM) { - ksft_test_result_fail("Disabling current's V alive must fail with EPERM(%d)\n", - errno); - return -5; + EXPECT_EQ(EPERM, errno) + TH_LOG("Disabling V in current thread with V enabled must fail with EPERM(%d)", errno); + EXPECT_EQ(-1, rc) + TH_LOG("Disabling V in current thread with V enabled must fail with EPERM(%d)", errno); +} + +TEST(vstate_on_no_nesting) +{ + long flag; + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); } /* Turn on next's vector explicitly and test */ flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; - if (test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_ON, 0)) - return -6; + + EXPECT_EQ(0, test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_ON, 0, xtheadvector)); +} + +TEST(vstate_off_nesting) +{ + long flag; + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); + } /* Turn off next's vector explicitly and test */ flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; - if (test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_OFF, 0)) - return -7; + + EXPECT_EQ(0, test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_OFF, 1, xtheadvector)); +} + +TEST(vstate_on_inherit_no_nesting) +{ + long flag, expected; + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); + } + + /* Turn on next's vector explicitly and test no inherit */ + flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; + flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT; + expected = flag | PR_RISCV_V_VSTATE_CTRL_ON; + + EXPECT_EQ(0, test_and_compare_child(flag, expected, 0, xtheadvector)); +} + +TEST(vstate_on_inherit) +{ + long flag, expected; + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); + } /* Turn on next's vector explicitly and test inherit */ flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT; expected = flag | PR_RISCV_V_VSTATE_CTRL_ON; - if (test_and_compare_child(flag, expected, 0)) - return -8; - if (test_and_compare_child(flag, expected, 1)) - return -9; + EXPECT_EQ(0, test_and_compare_child(flag, expected, 1, xtheadvector)); +} + +TEST(vstate_off_inherit_no_nesting) +{ + long flag, expected; + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); + } + /* Turn off next's vector explicitly and test no inherit */ + flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; + flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT; + expected = flag | PR_RISCV_V_VSTATE_CTRL_OFF; + + EXPECT_EQ(0, test_and_compare_child(flag, expected, 0, xtheadvector)); +} + +TEST(vstate_off_inherit) +{ + long flag, expected; + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); + } /* Turn off next's vector explicitly and test inherit */ flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT; expected = flag | PR_RISCV_V_VSTATE_CTRL_OFF; - if (test_and_compare_child(flag, expected, 0)) - return -10; - if (test_and_compare_child(flag, expected, 1)) - return -11; + EXPECT_EQ(0, test_and_compare_child(flag, expected, 1, xtheadvector)); +} + +/* arguments should fail with EINVAL */ +TEST(inval_set_control_1) +{ + int rc; + + if (!is_vector_supported() && !is_xtheadvector_supported()) + SKIP(return, "Vector not supported"); - /* arguments should fail with EINVAL */ rc = prctl(PR_RISCV_V_SET_CONTROL, 0xff0); - if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("Undefined control argument should return EINVAL\n"); - return -12; - } + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); +} + +/* arguments should fail with EINVAL */ +TEST(inval_set_control_2) +{ + int rc; + + if (!is_vector_supported() && !is_xtheadvector_supported()) + SKIP(return, "Vector not supported"); rc = prctl(PR_RISCV_V_SET_CONTROL, 0x3); - if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("Undefined control argument should return EINVAL\n"); - return -12; - } + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); +} - rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc); - if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("Undefined control argument should return EINVAL\n"); - return -12; - } +/* arguments should fail with EINVAL */ +TEST(inval_set_control_3) +{ + int rc; - rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc); - if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("Undefined control argument should return EINVAL\n"); - return -12; - } + if (!is_vector_supported() && !is_xtheadvector_supported()) + SKIP(return, "Vector not supported"); - ksft_test_result_pass("tests for riscv_v_vstate_ctrl pass\n"); - ksft_exit_pass(); - return 0; + rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c index 0ea4f6813930..4d4a76532dc9 100644 --- a/tools/testing/selftests/x86/lam.c +++ b/tools/testing/selftests/x86/lam.c @@ -237,7 +237,7 @@ static uint64_t set_metadata(uint64_t src, unsigned long lam) * both pointers should point to the same address. * * @return: - * 0: value on the pointer with metadate and value on original are same + * 0: value on the pointer with metadata and value on original are same * 1: not same. */ static int handle_lam_test(void *src, unsigned int lam) diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 7058dc614c25..de25892f865f 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -96,41 +96,57 @@ void vsock_wait_remote_close(int fd) close(epollfd); } -/* Bind to <bind_port>, connect to <cid, port> and return the file descriptor. */ -int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type) +/* Create socket <type>, bind to <cid, port> and return the file descriptor. */ +int vsock_bind(unsigned int cid, unsigned int port, int type) { - struct sockaddr_vm sa_client = { - .svm_family = AF_VSOCK, - .svm_cid = VMADDR_CID_ANY, - .svm_port = bind_port, - }; - struct sockaddr_vm sa_server = { + struct sockaddr_vm sa = { .svm_family = AF_VSOCK, .svm_cid = cid, .svm_port = port, }; + int fd; - int client_fd, ret; - - client_fd = socket(AF_VSOCK, type, 0); - if (client_fd < 0) { + fd = socket(AF_VSOCK, type, 0); + if (fd < 0) { perror("socket"); exit(EXIT_FAILURE); } - if (bind(client_fd, (struct sockaddr *)&sa_client, sizeof(sa_client))) { + if (bind(fd, (struct sockaddr *)&sa, sizeof(sa))) { perror("bind"); exit(EXIT_FAILURE); } + return fd; +} + +int vsock_connect_fd(int fd, unsigned int cid, unsigned int port) +{ + struct sockaddr_vm sa = { + .svm_family = AF_VSOCK, + .svm_cid = cid, + .svm_port = port, + }; + int ret; + timeout_begin(TIMEOUT); do { - ret = connect(client_fd, (struct sockaddr *)&sa_server, sizeof(sa_server)); + ret = connect(fd, (struct sockaddr *)&sa, sizeof(sa)); timeout_check("connect"); } while (ret < 0 && errno == EINTR); timeout_end(); - if (ret < 0) { + return ret; +} + +/* Bind to <bind_port>, connect to <cid, port> and return the file descriptor. */ +int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type) +{ + int client_fd; + + client_fd = vsock_bind(VMADDR_CID_ANY, bind_port, type); + + if (vsock_connect_fd(client_fd, cid, port)) { perror("connect"); exit(EXIT_FAILURE); } @@ -141,17 +157,6 @@ int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_po /* Connect to <cid, port> and return the file descriptor. */ int vsock_connect(unsigned int cid, unsigned int port, int type) { - union { - struct sockaddr sa; - struct sockaddr_vm svm; - } addr = { - .svm = { - .svm_family = AF_VSOCK, - .svm_port = port, - .svm_cid = cid, - }, - }; - int ret; int fd; control_expectln("LISTENING"); @@ -162,20 +167,14 @@ int vsock_connect(unsigned int cid, unsigned int port, int type) exit(EXIT_FAILURE); } - timeout_begin(TIMEOUT); - do { - ret = connect(fd, &addr.sa, sizeof(addr.svm)); - timeout_check("connect"); - } while (ret < 0 && errno == EINTR); - timeout_end(); - - if (ret < 0) { + if (vsock_connect_fd(fd, cid, port)) { int old_errno = errno; close(fd); fd = -1; errno = old_errno; } + return fd; } @@ -192,28 +191,9 @@ int vsock_seqpacket_connect(unsigned int cid, unsigned int port) /* Listen on <cid, port> and return the file descriptor. */ static int vsock_listen(unsigned int cid, unsigned int port, int type) { - union { - struct sockaddr sa; - struct sockaddr_vm svm; - } addr = { - .svm = { - .svm_family = AF_VSOCK, - .svm_port = port, - .svm_cid = cid, - }, - }; int fd; - fd = socket(AF_VSOCK, type, 0); - if (fd < 0) { - perror("socket"); - exit(EXIT_FAILURE); - } - - if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) { - perror("bind"); - exit(EXIT_FAILURE); - } + fd = vsock_bind(cid, port, type); if (listen(fd, 1) < 0) { perror("listen"); diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index e62f46b2b92a..d1f765ce3eee 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -39,10 +39,12 @@ struct test_case { void init_signals(void); unsigned int parse_cid(const char *str); unsigned int parse_port(const char *str); +int vsock_connect_fd(int fd, unsigned int cid, unsigned int port); int vsock_connect(unsigned int cid, unsigned int port, int type); int vsock_accept(unsigned int cid, unsigned int port, struct sockaddr_vm *clientaddrp, int type); int vsock_stream_connect(unsigned int cid, unsigned int port); +int vsock_bind(unsigned int cid, unsigned int port, int type); int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type); int vsock_seqpacket_connect(unsigned int cid, unsigned int port); diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 1eebbc0d5f61..dfff8b288265 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -113,24 +113,9 @@ static void test_stream_bind_only_client(const struct test_opts *opts) static void test_stream_bind_only_server(const struct test_opts *opts) { - union { - struct sockaddr sa; - struct sockaddr_vm svm; - } addr = { - .svm = { - .svm_family = AF_VSOCK, - .svm_port = opts->peer_port, - .svm_cid = VMADDR_CID_ANY, - }, - }; int fd; - fd = socket(AF_VSOCK, SOCK_STREAM, 0); - - if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) { - perror("bind"); - exit(EXIT_FAILURE); - } + fd = vsock_bind(VMADDR_CID_ANY, opts->peer_port, SOCK_STREAM); /* Notify the client that the server is ready */ control_writeln("BIND"); @@ -1708,6 +1693,101 @@ static void test_stream_msgzcopy_leak_zcskb_server(const struct test_opts *opts) close(fd); } +#define MAX_PORT_RETRIES 24 /* net/vmw_vsock/af_vsock.c */ + +/* Test attempts to trigger a transport release for an unbound socket. This can + * lead to a reference count mishandling. + */ +static void test_stream_transport_uaf_client(const struct test_opts *opts) +{ + int sockets[MAX_PORT_RETRIES]; + struct sockaddr_vm addr; + int fd, i, alen; + + fd = vsock_bind(VMADDR_CID_ANY, VMADDR_PORT_ANY, SOCK_STREAM); + + alen = sizeof(addr); + if (getsockname(fd, (struct sockaddr *)&addr, &alen)) { + perror("getsockname"); + exit(EXIT_FAILURE); + } + + for (i = 0; i < MAX_PORT_RETRIES; ++i) + sockets[i] = vsock_bind(VMADDR_CID_ANY, ++addr.svm_port, + SOCK_STREAM); + + close(fd); + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + if (fd < 0) { + perror("socket"); + exit(EXIT_FAILURE); + } + + if (!vsock_connect_fd(fd, addr.svm_cid, addr.svm_port)) { + perror("Unexpected connect() #1 success"); + exit(EXIT_FAILURE); + } + + /* Vulnerable system may crash now. */ + if (!vsock_connect_fd(fd, VMADDR_CID_HOST, VMADDR_PORT_ANY)) { + perror("Unexpected connect() #2 success"); + exit(EXIT_FAILURE); + } + + close(fd); + while (i--) + close(sockets[i]); + + control_writeln("DONE"); +} + +static void test_stream_transport_uaf_server(const struct test_opts *opts) +{ + control_expectln("DONE"); +} + +static void test_stream_connect_retry_client(const struct test_opts *opts) +{ + int fd; + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + if (fd < 0) { + perror("socket"); + exit(EXIT_FAILURE); + } + + if (!vsock_connect_fd(fd, opts->peer_cid, opts->peer_port)) { + fprintf(stderr, "Unexpected connect() #1 success\n"); + exit(EXIT_FAILURE); + } + + control_writeln("LISTEN"); + control_expectln("LISTENING"); + + if (vsock_connect_fd(fd, opts->peer_cid, opts->peer_port)) { + perror("connect() #2"); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_connect_retry_server(const struct test_opts *opts) +{ + int fd; + + control_expectln("LISTEN"); + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + vsock_wait_remote_close(fd); + close(fd); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -1853,6 +1933,16 @@ static struct test_case test_cases[] = { .run_client = test_stream_msgzcopy_leak_zcskb_client, .run_server = test_stream_msgzcopy_leak_zcskb_server, }, + { + .name = "SOCK_STREAM transport release use-after-free", + .run_client = test_stream_transport_uaf_client, + .run_server = test_stream_transport_uaf_server, + }, + { + .name = "SOCK_STREAM retry failed connect()", + .run_client = test_stream_connect_retry_client, + .run_server = test_stream_connect_retry_server, + }, {}, }; |