diff options
1101 files changed, 32501 insertions, 10889 deletions
diff --git a/.clang-format b/.clang-format index 196ca317bd1f..6ec5558b516b 100644 --- a/.clang-format +++ b/.clang-format @@ -86,6 +86,8 @@ ForEachMacros: - 'bio_for_each_segment_all' - 'bio_list_for_each' - 'bip_for_each_vec' + - 'bitmap_for_each_clear_region' + - 'bitmap_for_each_set_region' - 'blkg_for_each_descendant_post' - 'blkg_for_each_descendant_pre' - 'blk_queue_for_each_rl' @@ -115,6 +117,7 @@ ForEachMacros: - 'drm_client_for_each_connector_iter' - 'drm_client_for_each_modeset' - 'drm_connector_for_each_possible_encoder' + - 'drm_for_each_bridge_in_chain' - 'drm_for_each_connector_iter' - 'drm_for_each_crtc' - 'drm_for_each_encoder' @@ -136,9 +139,10 @@ ForEachMacros: - 'for_each_bio' - 'for_each_board_func_rsrc' - 'for_each_bvec' + - 'for_each_card_auxs' + - 'for_each_card_auxs_safe' - 'for_each_card_components' - - 'for_each_card_links' - - 'for_each_card_links_safe' + - 'for_each_card_pre_auxs' - 'for_each_card_prelinks' - 'for_each_card_rtds' - 'for_each_card_rtds_safe' @@ -166,6 +170,7 @@ ForEachMacros: - 'for_each_dpcm_fe' - 'for_each_drhd_unit' - 'for_each_dss_dev' + - 'for_each_efi_handle' - 'for_each_efi_memory_desc' - 'for_each_efi_memory_desc_in_map' - 'for_each_element' @@ -190,6 +195,7 @@ ForEachMacros: - 'for_each_lru' - 'for_each_matching_node' - 'for_each_matching_node_and_match' + - 'for_each_member' - 'for_each_memblock' - 'for_each_memblock_type' - 'for_each_memcg_cache_index' @@ -200,9 +206,11 @@ ForEachMacros: - 'for_each_msi_entry' - 'for_each_msi_entry_safe' - 'for_each_net' + - 'for_each_net_continue_reverse' - 'for_each_netdev' - 'for_each_netdev_continue' - 'for_each_netdev_continue_rcu' + - 'for_each_netdev_continue_reverse' - 'for_each_netdev_feature' - 'for_each_netdev_in_bond_rcu' - 'for_each_netdev_rcu' @@ -254,10 +262,10 @@ ForEachMacros: - 'for_each_reserved_mem_region' - 'for_each_rtd_codec_dai' - 'for_each_rtd_codec_dai_rollback' - - 'for_each_rtdcom' - - 'for_each_rtdcom_safe' + - 'for_each_rtd_components' - 'for_each_set_bit' - 'for_each_set_bit_from' + - 'for_each_set_clump8' - 'for_each_sg' - 'for_each_sg_dma_page' - 'for_each_sg_page' @@ -267,6 +275,7 @@ ForEachMacros: - 'for_each_subelement_id' - '__for_each_thread' - 'for_each_thread' + - 'for_each_wakeup_source' - 'for_each_zone' - 'for_each_zone_zonelist' - 'for_each_zone_zonelist_nodemask' @@ -330,6 +339,7 @@ ForEachMacros: - 'list_for_each' - 'list_for_each_codec' - 'list_for_each_codec_safe' + - 'list_for_each_continue' - 'list_for_each_entry' - 'list_for_each_entry_continue' - 'list_for_each_entry_continue_rcu' @@ -351,6 +361,7 @@ ForEachMacros: - 'llist_for_each_entry' - 'llist_for_each_entry_safe' - 'llist_for_each_safe' + - 'mci_for_each_dimm' - 'media_device_for_each_entity' - 'media_device_for_each_intf' - 'media_device_for_each_link' @@ -444,10 +455,16 @@ ForEachMacros: - 'virtio_device_for_each_vq' - 'xa_for_each' - 'xa_for_each_marked' + - 'xa_for_each_range' - 'xa_for_each_start' - 'xas_for_each' - 'xas_for_each_conflict' - 'xas_for_each_marked' + - 'xbc_array_for_each_value' + - 'xbc_for_each_key_value' + - 'xbc_node_for_each_array_value' + - 'xbc_node_for_each_child' + - 'xbc_node_for_each_key_value' - 'zorro_for_each_dev' #IncludeBlocks: Preserve # Unknown to clang-format-5.0 diff --git a/.gitignore b/.gitignore index 2763fce8766c..72ef86a5570d 100644 --- a/.gitignore +++ b/.gitignore @@ -100,10 +100,6 @@ modules.order /include/ksym/ /arch/*/include/generated/ -# Generated lkdtm tests -/tools/testing/selftests/lkdtm/*.sh -!/tools/testing/selftests/lkdtm/run.sh - # stgit generated dirs patches-* diff --git a/Documentation/admin-guide/acpi/fan_performance_states.rst b/Documentation/admin-guide/acpi/fan_performance_states.rst index 21d233ca50d8..98fe5c333121 100644 --- a/Documentation/admin-guide/acpi/fan_performance_states.rst +++ b/Documentation/admin-guide/acpi/fan_performance_states.rst @@ -18,7 +18,7 @@ may look as follows:: $ ls -l /sys/bus/acpi/devices/INT3404:00/ total 0 -... + ... -r--r--r-- 1 root root 4096 Dec 13 20:38 state0 -r--r--r-- 1 root root 4096 Dec 13 20:38 state1 -r--r--r-- 1 root root 4096 Dec 13 20:38 state10 @@ -38,7 +38,7 @@ where each of the "state*" files represents one performance state of the fan and contains a colon-separated list of 5 integer numbers (fields) with the following interpretation:: -control_percent:trip_point_index:speed_rpm:noise_level_mdb:power_mw + control_percent:trip_point_index:speed_rpm:noise_level_mdb:power_mw * ``control_percent``: The percent value to be used to set the fan speed to a specific level using the _FSL object (0-100). diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index dbc22d684627..c07815d230bc 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -136,6 +136,10 @@ dynamic table installation which will install SSDT tables to /sys/firmware/acpi/tables/dynamic. + acpi_no_watchdog [HW,ACPI,WDT] + Ignore the ACPI-based watchdog interface (WDAT) and let + a native driver control the watchdog device instead. + acpi_rsdp= [ACPI,EFI,KEXEC] Pass the RSDP address to the kernel, mostly used on machines running EFI runtime service to boot the diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index 287b98708a40..e043c9213388 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -67,7 +67,8 @@ two flavors of JITs, the newer eBPF JIT currently supported on: - sparc64 - mips64 - s390x - - riscv + - riscv64 + - riscv32 And the older cBPF JIT supported on the following archs: diff --git a/Documentation/devicetree/bindings/arm/arm,scmi.txt b/Documentation/devicetree/bindings/arm/arm,scmi.txt index f493d69e6194..dc102c4e4a78 100644 --- a/Documentation/devicetree/bindings/arm/arm,scmi.txt +++ b/Documentation/devicetree/bindings/arm/arm,scmi.txt @@ -102,7 +102,7 @@ Required sub-node properties: [1] Documentation/devicetree/bindings/clock/clock-bindings.txt [2] Documentation/devicetree/bindings/power/power-domain.yaml [3] Documentation/devicetree/bindings/thermal/thermal.txt -[4] Documentation/devicetree/bindings/sram/sram.txt +[4] Documentation/devicetree/bindings/sram/sram.yaml [5] Documentation/devicetree/bindings/reset/reset.txt Example: diff --git a/Documentation/devicetree/bindings/arm/arm,scpi.txt b/Documentation/devicetree/bindings/arm/arm,scpi.txt index 7b83ef43b418..dd04d9d9a1b8 100644 --- a/Documentation/devicetree/bindings/arm/arm,scpi.txt +++ b/Documentation/devicetree/bindings/arm/arm,scpi.txt @@ -109,7 +109,7 @@ Required properties: [0] http://infocenter.arm.com/help/topic/com.arm.doc.dui0922b/index.html [1] Documentation/devicetree/bindings/clock/clock-bindings.txt [2] Documentation/devicetree/bindings/thermal/thermal.txt -[3] Documentation/devicetree/bindings/sram/sram.txt +[3] Documentation/devicetree/bindings/sram/sram.yaml [4] Documentation/devicetree/bindings/power/power-domain.yaml Example: diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm63138.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm63138.txt index b82b6a0ae6f7..8c7a4908a849 100644 --- a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm63138.txt +++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm63138.txt @@ -62,7 +62,7 @@ Timer node: Syscon reboot node: -See Documentation/devicetree/bindings/power/reset/syscon-reboot.txt for the +See Documentation/devicetree/bindings/power/reset/syscon-reboot.yaml for the detailed list of properties, the two values defined below are specific to the BCM6328-style timer: diff --git a/Documentation/devicetree/bindings/arm/cpus.yaml b/Documentation/devicetree/bindings/arm/cpus.yaml index 7a9c3ce2dbef..0d5b61056b10 100644 --- a/Documentation/devicetree/bindings/arm/cpus.yaml +++ b/Documentation/devicetree/bindings/arm/cpus.yaml @@ -216,7 +216,7 @@ properties: $ref: '/schemas/types.yaml#/definitions/phandle-array' description: | List of phandles to idle state nodes supported - by this cpu (see ./idle-states.txt). + by this cpu (see ./idle-states.yaml). capacity-dmips-mhz: $ref: '/schemas/types.yaml#/definitions/uint32' diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml b/Documentation/devicetree/bindings/arm/fsl.yaml index a8e0b4a813ed..0e17e1f6fb80 100644 --- a/Documentation/devicetree/bindings/arm/fsl.yaml +++ b/Documentation/devicetree/bindings/arm/fsl.yaml @@ -160,7 +160,7 @@ properties: items: - enum: - armadeus,imx6dl-apf6 # APF6 (Solo) SoM - - armadeus,imx6dl-apf6dldev # APF6 (Solo) SoM on APF6Dev board + - armadeus,imx6dl-apf6dev # APF6 (Solo) SoM on APF6Dev board - eckelmann,imx6dl-ci4x10 - emtrion,emcon-mx6 # emCON-MX6S or emCON-MX6DL SoM - emtrion,emcon-mx6-avari # emCON-MX6S or emCON-MX6DL SoM on Avari Base diff --git a/Documentation/devicetree/bindings/arm/hisilicon/hi3519-sysctrl.txt b/Documentation/devicetree/bindings/arm/hisilicon/hi3519-sysctrl.txt index 115c5be0bd0b..8defacc44dd5 100644 --- a/Documentation/devicetree/bindings/arm/hisilicon/hi3519-sysctrl.txt +++ b/Documentation/devicetree/bindings/arm/hisilicon/hi3519-sysctrl.txt @@ -1,7 +1,7 @@ * Hisilicon Hi3519 System Controller Block This bindings use the following binding: -Documentation/devicetree/bindings/mfd/syscon.txt +Documentation/devicetree/bindings/mfd/syscon.yaml Required properties: - compatible: "hisilicon,hi3519-sysctrl". diff --git a/Documentation/devicetree/bindings/arm/msm/qcom,idle-state.txt b/Documentation/devicetree/bindings/arm/msm/qcom,idle-state.txt index 06df04cc827a..6ce0b212ec6d 100644 --- a/Documentation/devicetree/bindings/arm/msm/qcom,idle-state.txt +++ b/Documentation/devicetree/bindings/arm/msm/qcom,idle-state.txt @@ -81,4 +81,4 @@ Example: }; }; -[1]. Documentation/devicetree/bindings/arm/idle-states.txt +[1]. Documentation/devicetree/bindings/arm/idle-states.yaml diff --git a/Documentation/devicetree/bindings/arm/omap/mpu.txt b/Documentation/devicetree/bindings/arm/omap/mpu.txt index f301e636fd52..e41490e6979c 100644 --- a/Documentation/devicetree/bindings/arm/omap/mpu.txt +++ b/Documentation/devicetree/bindings/arm/omap/mpu.txt @@ -17,7 +17,7 @@ am335x and am437x only: - pm-sram: Phandles to ocmcram nodes to be used for power management. First should be type 'protect-exec' for the driver to use to copy and run PM functions, second should be regular pool to be used for - data region for code. See Documentation/devicetree/bindings/sram/sram.txt + data region for code. See Documentation/devicetree/bindings/sram/sram.yaml for more details. Examples: diff --git a/Documentation/devicetree/bindings/arm/psci.yaml b/Documentation/devicetree/bindings/arm/psci.yaml index 8ef85420b2ab..5e66934455bb 100644 --- a/Documentation/devicetree/bindings/arm/psci.yaml +++ b/Documentation/devicetree/bindings/arm/psci.yaml @@ -100,13 +100,14 @@ properties: bindings in [1]) must specify this property. [1] Kernel documentation - ARM idle states bindings - Documentation/devicetree/bindings/arm/idle-states.txt - - "#power-domain-cells": - description: - The number of cells in a PM domain specifier as per binding in [3]. - Must be 0 as to represent a single PM domain. + Documentation/devicetree/bindings/arm/idle-states.yaml +patternProperties: + "^power-domain-": + allOf: + - $ref: "../power/power-domain.yaml#" + type: object + description: | ARM systems can have multiple cores, sometimes in an hierarchical arrangement. This often, but not always, maps directly to the processor power topology of the system. Individual nodes in a topology have their @@ -122,14 +123,8 @@ properties: helps to implement support for OSI mode and OS implementations may choose to mandate it. - [3] Documentation/devicetree/bindings/power/power_domain.txt - [4] Documentation/devicetree/bindings/power/domain-idle-state.txt - - power-domains: - $ref: '/schemas/types.yaml#/definitions/phandle-array' - description: - List of phandles and PM domain specifiers, as defined by bindings of the - PM domain provider. + [3] Documentation/devicetree/bindings/power/power-domain.yaml + [4] Documentation/devicetree/bindings/power/domain-idle-state.yaml required: - compatible @@ -199,7 +194,7 @@ examples: CPU0: cpu@0 { device_type = "cpu"; - compatible = "arm,cortex-a53", "arm,armv8"; + compatible = "arm,cortex-a53"; reg = <0x0>; enable-method = "psci"; power-domains = <&CPU_PD0>; @@ -208,7 +203,7 @@ examples: CPU1: cpu@1 { device_type = "cpu"; - compatible = "arm,cortex-a57", "arm,armv8"; + compatible = "arm,cortex-a53"; reg = <0x100>; enable-method = "psci"; power-domains = <&CPU_PD1>; @@ -224,6 +219,9 @@ examples: exit-latency-us = <10>; min-residency-us = <100>; }; + }; + + domain-idle-states { CLUSTER_RET: cluster-retention { compatible = "domain-idle-state"; @@ -247,19 +245,19 @@ examples: compatible = "arm,psci-1.0"; method = "smc"; - CPU_PD0: cpu-pd0 { + CPU_PD0: power-domain-cpu0 { #power-domain-cells = <0>; domain-idle-states = <&CPU_PWRDN>; power-domains = <&CLUSTER_PD>; }; - CPU_PD1: cpu-pd1 { + CPU_PD1: power-domain-cpu1 { #power-domain-cells = <0>; domain-idle-states = <&CPU_PWRDN>; power-domains = <&CLUSTER_PD>; }; - CLUSTER_PD: cluster-pd { + CLUSTER_PD: power-domain-cluster { #power-domain-cells = <0>; domain-idle-states = <&CLUSTER_RET>, <&CLUSTER_PWRDN>; }; diff --git a/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml b/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml index 68917bb7c7e8..55f7938c4826 100644 --- a/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml +++ b/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml @@ -52,7 +52,7 @@ required: examples: - | - mlahb: ahb { + mlahb: ahb@38000000 { compatible = "st,mlahb", "simple-bus"; #address-cells = <1>; #size-cells = <1>; diff --git a/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml b/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml index 9fe11ceecdba..80973619342d 100644 --- a/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml +++ b/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml @@ -70,7 +70,6 @@ examples: #size-cells = <0>; pmic@3e3 { - compatible = "..."; reg = <0x3e3>; /* ... */ diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-osc-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-osc-clk.yaml index 69cfa4a3d562..c604822cda07 100644 --- a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-osc-clk.yaml +++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-osc-clk.yaml @@ -40,7 +40,7 @@ additionalProperties: false examples: - | - osc24M: clk@01c20050 { + osc24M: clk@1c20050 { #clock-cells = <0>; compatible = "allwinner,sun4i-a10-osc-clk"; reg = <0x01c20050 0x4>; diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-gt-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-gt-clk.yaml index 07f38def7dc3..43963c3062c8 100644 --- a/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-gt-clk.yaml +++ b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-gt-clk.yaml @@ -41,7 +41,7 @@ additionalProperties: false examples: - | - clk@0600005c { + clk@600005c { #clock-cells = <0>; compatible = "allwinner,sun9i-a80-gt-clk"; reg = <0x0600005c 0x4>; diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-apq8064.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-apq8064.yaml index 17f87178f6b8..3647007f82ca 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gcc-apq8064.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,gcc-apq8064.yaml @@ -42,7 +42,7 @@ properties: be part of GCC and hence the TSENS properties can also be part of the GCC/clock-controller node. For more details on the TSENS properties please refer - Documentation/devicetree/bindings/thermal/qcom-tsens.txt + Documentation/devicetree/bindings/thermal/qcom-tsens.yaml nvmem-cell-names: minItems: 1 diff --git a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tv-encoder.yaml b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tv-encoder.yaml index 5d5d39665119..6009324be967 100644 --- a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tv-encoder.yaml +++ b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tv-encoder.yaml @@ -49,11 +49,7 @@ examples: resets = <&tcon_ch0_clk 0>; port { - #address-cells = <1>; - #size-cells = <0>; - - tve0_in_tcon0: endpoint@0 { - reg = <0>; + tve0_in_tcon0: endpoint { remote-endpoint = <&tcon0_out_tve0>; }; }; diff --git a/Documentation/devicetree/bindings/display/bridge/anx6345.yaml b/Documentation/devicetree/bindings/display/bridge/anx6345.yaml index 6d72b3d11fbc..c21103869923 100644 --- a/Documentation/devicetree/bindings/display/bridge/anx6345.yaml +++ b/Documentation/devicetree/bindings/display/bridge/anx6345.yaml @@ -79,21 +79,15 @@ examples: #size-cells = <0>; anx6345_in: port@0 { - #address-cells = <1>; - #size-cells = <0>; reg = <0>; - anx6345_in_tcon0: endpoint@0 { - reg = <0>; + anx6345_in_tcon0: endpoint { remote-endpoint = <&tcon0_out_anx6345>; }; }; anx6345_out: port@1 { - #address-cells = <1>; - #size-cells = <0>; reg = <1>; - anx6345_out_panel: endpoint@0 { - reg = <0>; + anx6345_out_panel: endpoint { remote-endpoint = <&panel_in_edp>; }; }; diff --git a/Documentation/devicetree/bindings/display/panel/leadtek,ltk500hd1829.yaml b/Documentation/devicetree/bindings/display/panel/leadtek,ltk500hd1829.yaml index 4ebcea7d0c63..a614644c9849 100644 --- a/Documentation/devicetree/bindings/display/panel/leadtek,ltk500hd1829.yaml +++ b/Documentation/devicetree/bindings/display/panel/leadtek,ltk500hd1829.yaml @@ -37,6 +37,8 @@ examples: dsi@ff450000 { #address-cells = <1>; #size-cells = <0>; + reg = <0xff450000 0x1000>; + panel@0 { compatible = "leadtek,ltk500hd1829"; reg = <0>; diff --git a/Documentation/devicetree/bindings/display/panel/xinpeng,xpp055c272.yaml b/Documentation/devicetree/bindings/display/panel/xinpeng,xpp055c272.yaml index 186e5e1c8fa3..22c91beb0541 100644 --- a/Documentation/devicetree/bindings/display/panel/xinpeng,xpp055c272.yaml +++ b/Documentation/devicetree/bindings/display/panel/xinpeng,xpp055c272.yaml @@ -37,6 +37,8 @@ examples: dsi@ff450000 { #address-cells = <1>; #size-cells = <0>; + reg = <0xff450000 0x1000>; + panel@0 { compatible = "xinpeng,xpp055c272"; reg = <0>; diff --git a/Documentation/devicetree/bindings/display/simple-framebuffer.yaml b/Documentation/devicetree/bindings/display/simple-framebuffer.yaml index 678776b6012a..1db608c9eef5 100644 --- a/Documentation/devicetree/bindings/display/simple-framebuffer.yaml +++ b/Documentation/devicetree/bindings/display/simple-framebuffer.yaml @@ -174,10 +174,6 @@ examples: }; }; - soc@1c00000 { - lcdc0: lcdc@1c0c000 { - compatible = "allwinner,sun4i-a10-lcdc"; - }; - }; + lcdc0: lcdc { }; ... diff --git a/Documentation/devicetree/bindings/display/tilcdc/tilcdc.txt b/Documentation/devicetree/bindings/display/tilcdc/tilcdc.txt index 7bf1bb444812..aac617acb64f 100644 --- a/Documentation/devicetree/bindings/display/tilcdc/tilcdc.txt +++ b/Documentation/devicetree/bindings/display/tilcdc/tilcdc.txt @@ -37,7 +37,7 @@ Optional nodes: supports a single port with a single endpoint. - See also Documentation/devicetree/bindings/display/tilcdc/panel.txt and - Documentation/devicetree/bindings/display/tilcdc/tfp410.txt for connecting + Documentation/devicetree/bindings/display/bridge/ti,tfp410.txt for connecting tfp410 DVI encoder or lcd panel to lcdc [1] There is an errata about AM335x color wiring. For 16-bit color mode diff --git a/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml index 8b5c346f23f6..34780d7535b8 100644 --- a/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml +++ b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml @@ -143,7 +143,7 @@ examples: #size-cells = <2>; dma-coherent; dma-ranges; - ranges; + ranges = <0x0 0x30800000 0x0 0x30800000 0x0 0x05000000>; ti,sci-dev-id = <118>; @@ -169,16 +169,4 @@ examples: ti,sci-rm-range-rflow = <0x6>; /* GP RFLOW */ }; }; - - mcasp0: mcasp@02B00000 { - dmas = <&main_udmap 0xc400>, <&main_udmap 0x4400>; - dma-names = "tx", "rx"; - }; - - crypto: crypto@4E00000 { - compatible = "ti,sa2ul-crypto"; - - dmas = <&main_udmap 0xc000>, <&main_udmap 0x4000>, <&main_udmap 0x4001>; - dma-names = "tx", "rx1", "rx2"; - }; }; diff --git a/Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml b/Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml index 4ea6a8789699..e8b99adcb1bd 100644 --- a/Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml +++ b/Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml @@ -84,31 +84,31 @@ examples: gpu_opp_table: opp_table0 { compatible = "operating-points-v2"; - opp@533000000 { + opp-533000000 { opp-hz = /bits/ 64 <533000000>; opp-microvolt = <1250000>; }; - opp@450000000 { + opp-450000000 { opp-hz = /bits/ 64 <450000000>; opp-microvolt = <1150000>; }; - opp@400000000 { + opp-400000000 { opp-hz = /bits/ 64 <400000000>; opp-microvolt = <1125000>; }; - opp@350000000 { + opp-350000000 { opp-hz = /bits/ 64 <350000000>; opp-microvolt = <1075000>; }; - opp@266000000 { + opp-266000000 { opp-hz = /bits/ 64 <266000000>; opp-microvolt = <1025000>; }; - opp@160000000 { + opp-160000000 { opp-hz = /bits/ 64 <160000000>; opp-microvolt = <925000>; }; - opp@100000000 { + opp-100000000 { opp-hz = /bits/ 64 <100000000>; opp-microvolt = <912500>; }; diff --git a/Documentation/devicetree/bindings/gpu/arm,mali-midgard.yaml b/Documentation/devicetree/bindings/gpu/arm,mali-midgard.yaml index 36f59b3ade71..8d966f3ff3db 100644 --- a/Documentation/devicetree/bindings/gpu/arm,mali-midgard.yaml +++ b/Documentation/devicetree/bindings/gpu/arm,mali-midgard.yaml @@ -138,31 +138,31 @@ examples: gpu_opp_table: opp_table0 { compatible = "operating-points-v2"; - opp@533000000 { + opp-533000000 { opp-hz = /bits/ 64 <533000000>; opp-microvolt = <1250000>; }; - opp@450000000 { + opp-450000000 { opp-hz = /bits/ 64 <450000000>; opp-microvolt = <1150000>; }; - opp@400000000 { + opp-400000000 { opp-hz = /bits/ 64 <400000000>; opp-microvolt = <1125000>; }; - opp@350000000 { + opp-350000000 { opp-hz = /bits/ 64 <350000000>; opp-microvolt = <1075000>; }; - opp@266000000 { + opp-266000000 { opp-hz = /bits/ 64 <266000000>; opp-microvolt = <1025000>; }; - opp@160000000 { + opp-160000000 { opp-hz = /bits/ 64 <160000000>; opp-microvolt = <925000>; }; - opp@100000000 { + opp-100000000 { opp-hz = /bits/ 64 <100000000>; opp-microvolt = <912500>; }; diff --git a/Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.yaml b/Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.yaml index f46de17c0878..cc3c8ea6a894 100644 --- a/Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.yaml @@ -123,7 +123,7 @@ examples: samsung,syscon-phandle = <&pmu_system_controller>; /* NTC thermistor is a hwmon device */ - ncp15wb473@0 { + ncp15wb473 { compatible = "murata,ncp15wb473"; pullup-uv = <1800000>; pullup-ohm = <47000>; diff --git a/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml b/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml index d7c3262b2494..c99ed3934d7e 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml @@ -62,7 +62,7 @@ required: examples: - | - i2c@00000000 { + i2c { #address-cells = <1>; #size-cells = <0>; gt928@5d { diff --git a/Documentation/devicetree/bindings/input/twl4030-pwrbutton.txt b/Documentation/devicetree/bindings/input/twl4030-pwrbutton.txt index c864a46cddcf..f5021214edec 100644 --- a/Documentation/devicetree/bindings/input/twl4030-pwrbutton.txt +++ b/Documentation/devicetree/bindings/input/twl4030-pwrbutton.txt @@ -1,7 +1,7 @@ Texas Instruments TWL family (twl4030) pwrbutton module This module is part of the TWL4030. For more details about the whole -chip see Documentation/devicetree/bindings/mfd/twl-familly.txt. +chip see Documentation/devicetree/bindings/mfd/twl-family.txt. This module provides a simple power button event via an Interrupt. diff --git a/Documentation/devicetree/bindings/leds/common.yaml b/Documentation/devicetree/bindings/leds/common.yaml index d97d099b87e5..c60b994fe116 100644 --- a/Documentation/devicetree/bindings/leds/common.yaml +++ b/Documentation/devicetree/bindings/leds/common.yaml @@ -85,7 +85,7 @@ properties: # LED will act as a back-light, controlled by the framebuffer system - backlight # LED will turn on (but for leds-gpio see "default-state" property in - # Documentation/devicetree/bindings/leds/leds-gpio.txt) + # Documentation/devicetree/bindings/leds/leds-gpio.yaml) - default-on # LED "double" flashes at a load average based rate - heartbeat diff --git a/Documentation/devicetree/bindings/leds/register-bit-led.txt b/Documentation/devicetree/bindings/leds/register-bit-led.txt index cf1ea403ba7a..c7af6f70a97b 100644 --- a/Documentation/devicetree/bindings/leds/register-bit-led.txt +++ b/Documentation/devicetree/bindings/leds/register-bit-led.txt @@ -5,7 +5,7 @@ where single bits in a certain register can turn on/off a single LED. The register bit LEDs appear as children to the syscon device, with the proper compatible string. For the syscon bindings see: -Documentation/devicetree/bindings/mfd/syscon.txt +Documentation/devicetree/bindings/mfd/syscon.yaml Each LED is represented as a sub-node of the syscon device. Each node's name represents the name of the corresponding LED. diff --git a/Documentation/devicetree/bindings/media/ti,cal.yaml b/Documentation/devicetree/bindings/media/ti,cal.yaml index 1ea784179536..5e066629287d 100644 --- a/Documentation/devicetree/bindings/media/ti,cal.yaml +++ b/Documentation/devicetree/bindings/media/ti,cal.yaml @@ -177,7 +177,7 @@ examples: }; }; - i2c5: i2c@4807c000 { + i2c { clock-frequency = <400000>; #address-cells = <1>; #size-cells = <0>; diff --git a/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt b/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt index 44d71469c914..63f674ffeb4f 100644 --- a/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt +++ b/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt @@ -32,7 +32,7 @@ Required only for "ti,emif-am3352" and "ti,emif-am4372": - sram : Phandles for generic sram driver nodes, first should be type 'protect-exec' for the driver to use to copy and run PM functions, second should be regular pool to be used for - data region for code. See Documentation/devicetree/bindings/sram/sram.txt + data region for code. See Documentation/devicetree/bindings/sram/sram.yaml for more details. Optional properties: diff --git a/Documentation/devicetree/bindings/mfd/max77650.yaml b/Documentation/devicetree/bindings/mfd/max77650.yaml index 4a70f875a6eb..480385789394 100644 --- a/Documentation/devicetree/bindings/mfd/max77650.yaml +++ b/Documentation/devicetree/bindings/mfd/max77650.yaml @@ -97,14 +97,14 @@ examples: regulators { compatible = "maxim,max77650-regulator"; - max77650_ldo: regulator@0 { + max77650_ldo: regulator-ldo { regulator-compatible = "ldo"; regulator-name = "max77650-ldo"; regulator-min-microvolt = <1350000>; regulator-max-microvolt = <2937500>; }; - max77650_sbb0: regulator@1 { + max77650_sbb0: regulator-sbb0 { regulator-compatible = "sbb0"; regulator-name = "max77650-sbb0"; regulator-min-microvolt = <800000>; diff --git a/Documentation/devicetree/bindings/mfd/tps65910.txt b/Documentation/devicetree/bindings/mfd/tps65910.txt index 4f62143afd24..a5ced46bbde9 100644 --- a/Documentation/devicetree/bindings/mfd/tps65910.txt +++ b/Documentation/devicetree/bindings/mfd/tps65910.txt @@ -26,8 +26,8 @@ Required properties: ldo6, ldo7, ldo8 - xxx-supply: Input voltage supply regulator. - These entries are require if regulators are enabled for a device. Missing of these - properties can cause the regulator registration fails. + These entries are required if regulators are enabled for a device. Missing these + properties can cause the regulator registration to fail. If some of input supply is powered through battery or always-on supply then also it is require to have these parameters with proper node handle of always on power supply. diff --git a/Documentation/devicetree/bindings/mfd/twl-familly.txt b/Documentation/devicetree/bindings/mfd/twl-family.txt index 56f244b5d8a4..56f244b5d8a4 100644 --- a/Documentation/devicetree/bindings/mfd/twl-familly.txt +++ b/Documentation/devicetree/bindings/mfd/twl-family.txt diff --git a/Documentation/devicetree/bindings/mfd/zii,rave-sp.txt b/Documentation/devicetree/bindings/mfd/zii,rave-sp.txt index 088eff9ddb78..e0f901edc063 100644 --- a/Documentation/devicetree/bindings/mfd/zii,rave-sp.txt +++ b/Documentation/devicetree/bindings/mfd/zii,rave-sp.txt @@ -20,7 +20,7 @@ RAVE SP consists of the following sub-devices: Device Description ------ ----------- rave-sp-wdt : Watchdog -rave-sp-nvmem : Interface to onborad EEPROM +rave-sp-nvmem : Interface to onboard EEPROM rave-sp-backlight : Display backlight rave-sp-hwmon : Interface to onboard hardware sensors rave-sp-leds : Interface to onboard LEDs diff --git a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt index bb7e896cb644..9134e9bcca56 100644 --- a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt +++ b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt @@ -26,7 +26,7 @@ For generic IOMMU bindings, see Documentation/devicetree/bindings/iommu/iommu.txt. For arm-smmu binding, see: -Documentation/devicetree/bindings/iommu/arm,smmu.txt. +Documentation/devicetree/bindings/iommu/arm,smmu.yaml. Required properties: diff --git a/Documentation/devicetree/bindings/mmc/mmc-controller.yaml b/Documentation/devicetree/bindings/mmc/mmc-controller.yaml index 3c0df4016a12..8fded83c519a 100644 --- a/Documentation/devicetree/bindings/mmc/mmc-controller.yaml +++ b/Documentation/devicetree/bindings/mmc/mmc-controller.yaml @@ -370,6 +370,7 @@ examples: mmc3: mmc@1c12000 { #address-cells = <1>; #size-cells = <0>; + reg = <0x1c12000 0x200>; pinctrl-names = "default"; pinctrl-0 = <&mmc3_pins_a>; vmmc-supply = <®_vmmc3>; diff --git a/Documentation/devicetree/bindings/mtd/cadence-nand-controller.txt b/Documentation/devicetree/bindings/mtd/cadence-nand-controller.txt index f3893c4d3c6a..d2eada5044b2 100644 --- a/Documentation/devicetree/bindings/mtd/cadence-nand-controller.txt +++ b/Documentation/devicetree/bindings/mtd/cadence-nand-controller.txt @@ -27,7 +27,7 @@ Required properties of NAND chips: - reg: shall contain the native Chip Select ids from 0 to max supported by the cadence nand flash controller -See Documentation/devicetree/bindings/mtd/nand.txt for more details on +See Documentation/devicetree/bindings/mtd/nand-controller.yaml for more details on generic bindings. Example: diff --git a/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt b/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt index 48a7f916c5e4..88b57b0ca1f4 100644 --- a/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt +++ b/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt @@ -45,7 +45,7 @@ Optional properties: switch queue - resets: a single phandle and reset identifier pair. See - Documentation/devicetree/binding/reset/reset.txt for details. + Documentation/devicetree/bindings/reset/reset.txt for details. - reset-names: If the "reset" property is specified, this property should have the value "switch" to denote the switch reset line. diff --git a/Documentation/devicetree/bindings/net/fsl-fman.txt b/Documentation/devicetree/bindings/net/fsl-fman.txt index 250f8d8cdce4..c00fb0d22c7b 100644 --- a/Documentation/devicetree/bindings/net/fsl-fman.txt +++ b/Documentation/devicetree/bindings/net/fsl-fman.txt @@ -110,6 +110,13 @@ PROPERTIES Usage: required Definition: See soc/fsl/qman.txt and soc/fsl/bman.txt +- fsl,erratum-a050385 + Usage: optional + Value type: boolean + Definition: A boolean property. Indicates the presence of the + erratum A050385 which indicates that DMA transactions that are + split can result in a FMan lock. + ============================================================================= FMan MURAM Node diff --git a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt index 999aceadb985..beca6466d59a 100644 --- a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt +++ b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt @@ -31,6 +31,7 @@ Optional properties for compatible string qcom,wcn399x-bt: - max-speed: see Documentation/devicetree/bindings/serial/slave-device.txt - firmware-name: specify the name of nvm firmware to load + - clocks: clock provided to the controller Examples: @@ -57,5 +58,6 @@ serial@898000 { vddch0-supply = <&vreg_l25a_3p3>; max-speed = <3200000>; firmware-name = "crnv21.bin"; + clocks = <&rpmhcc RPMH_RF_CLK2>; }; }; diff --git a/Documentation/devicetree/bindings/nvmem/nvmem.yaml b/Documentation/devicetree/bindings/nvmem/nvmem.yaml index b43c6c65294e..65980224d550 100644 --- a/Documentation/devicetree/bindings/nvmem/nvmem.yaml +++ b/Documentation/devicetree/bindings/nvmem/nvmem.yaml @@ -76,6 +76,8 @@ examples: qfprom: eeprom@700000 { #address-cells = <1>; #size-cells = <1>; + reg = <0x00700000 0x100000>; + wp-gpios = <&gpio1 3 GPIO_ACTIVE_HIGH>; /* ... */ diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun4i-a10-usb-phy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun4i-a10-usb-phy.yaml index 020ef9e4c411..94ac23687b7e 100644 --- a/Documentation/devicetree/bindings/phy/allwinner,sun4i-a10-usb-phy.yaml +++ b/Documentation/devicetree/bindings/phy/allwinner,sun4i-a10-usb-phy.yaml @@ -86,7 +86,7 @@ examples: #include <dt-bindings/clock/sun4i-a10-ccu.h> #include <dt-bindings/reset/sun4i-a10-ccu.h> - usbphy: phy@01c13400 { + usbphy: phy@1c13400 { #phy-cells = <1>; compatible = "allwinner,sun4i-a10-usb-phy"; reg = <0x01c13400 0x10>, <0x01c14800 0x4>, <0x01c1c800 0x4>; diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml index bb690e20c368..135c7dfbc180 100644 --- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml @@ -17,7 +17,7 @@ description: |+ "aspeed,ast2400-scu", "syscon", "simple-mfd" Refer to the the bindings described in - Documentation/devicetree/bindings/mfd/syscon.txt + Documentation/devicetree/bindings/mfd/syscon.yaml properties: compatible: diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml index f7f5d57f2c9a..824f7fd1d51b 100644 --- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml @@ -18,7 +18,7 @@ description: |+ "aspeed,g5-scu", "syscon", "simple-mfd" Refer to the the bindings described in - Documentation/devicetree/bindings/mfd/syscon.txt + Documentation/devicetree/bindings/mfd/syscon.yaml properties: compatible: diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml index 3749fa233e87..ac8d1c30a8ed 100644 --- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml @@ -17,7 +17,7 @@ description: |+ "aspeed,ast2600-scu", "syscon", "simple-mfd" Refer to the the bindings described in - Documentation/devicetree/bindings/mfd/syscon.txt + Documentation/devicetree/bindings/mfd/syscon.yaml properties: compatible: diff --git a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml index 754ea7ab040a..ef4de32cb17c 100644 --- a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml @@ -248,7 +248,7 @@ examples: }; //Example 3 pin groups - pinctrl@60020000 { + pinctrl { usart1_pins_a: usart1-0 { pins1 { pinmux = <STM32_PINMUX('A', 9, AF7)>; diff --git a/Documentation/devicetree/bindings/power/amlogic,meson-ee-pwrc.yaml b/Documentation/devicetree/bindings/power/amlogic,meson-ee-pwrc.yaml index aab70e8b681e..d3098c924b25 100644 --- a/Documentation/devicetree/bindings/power/amlogic,meson-ee-pwrc.yaml +++ b/Documentation/devicetree/bindings/power/amlogic,meson-ee-pwrc.yaml @@ -18,7 +18,7 @@ description: |+ "amlogic,meson-gx-hhi-sysctrl", "simple-mfd", "syscon" Refer to the the bindings described in - Documentation/devicetree/bindings/mfd/syscon.txt + Documentation/devicetree/bindings/mfd/syscon.yaml properties: compatible: diff --git a/Documentation/devicetree/bindings/power/domain-idle-state.txt b/Documentation/devicetree/bindings/power/domain-idle-state.txt deleted file mode 100644 index eefc7ed22ca2..000000000000 --- a/Documentation/devicetree/bindings/power/domain-idle-state.txt +++ /dev/null @@ -1,33 +0,0 @@ -PM Domain Idle State Node: - -A domain idle state node represents the state parameters that will be used to -select the state when there are no active components in the domain. - -The state node has the following parameters - - -- compatible: - Usage: Required - Value type: <string> - Definition: Must be "domain-idle-state". - -- entry-latency-us - Usage: Required - Value type: <prop-encoded-array> - Definition: u32 value representing worst case latency in - microseconds required to enter the idle state. - The exit-latency-us duration may be guaranteed - only after entry-latency-us has passed. - -- exit-latency-us - Usage: Required - Value type: <prop-encoded-array> - Definition: u32 value representing worst case latency - in microseconds required to exit the idle state. - -- min-residency-us - Usage: Required - Value type: <prop-encoded-array> - Definition: u32 value representing minimum residency duration - in microseconds after which the idle state will yield - power benefits after overcoming the overhead in entering -i the idle state. diff --git a/Documentation/devicetree/bindings/power/domain-idle-state.yaml b/Documentation/devicetree/bindings/power/domain-idle-state.yaml new file mode 100644 index 000000000000..dfba1af9abe5 --- /dev/null +++ b/Documentation/devicetree/bindings/power/domain-idle-state.yaml @@ -0,0 +1,64 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/power/domain-idle-state.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: PM Domain Idle States binding description + +maintainers: + - Ulf Hansson <ulf.hansson@linaro.org> + +description: + A domain idle state node represents the state parameters that will be used to + select the state when there are no active components in the PM domain. + +properties: + $nodename: + const: domain-idle-states + +patternProperties: + "^(cpu|cluster|domain)-": + type: object + description: + Each state node represents a domain idle state description. + + properties: + compatible: + const: domain-idle-state + + entry-latency-us: + description: + The worst case latency in microseconds required to enter the idle + state. Note that, the exit-latency-us duration may be guaranteed only + after the entry-latency-us has passed. + + exit-latency-us: + description: + The worst case latency in microseconds required to exit the idle + state. + + min-residency-us: + description: + The minimum residency duration in microseconds after which the idle + state will yield power benefits, after overcoming the overhead while + entering the idle state. + + required: + - compatible + - entry-latency-us + - exit-latency-us + - min-residency-us + +examples: + - | + + domain-idle-states { + domain_retention: domain-retention { + compatible = "domain-idle-state"; + entry-latency-us = <20>; + exit-latency-us = <40>; + min-residency-us = <80>; + }; + }; +... diff --git a/Documentation/devicetree/bindings/power/power-domain.yaml b/Documentation/devicetree/bindings/power/power-domain.yaml index 455b573293ae..6047aacd7766 100644 --- a/Documentation/devicetree/bindings/power/power-domain.yaml +++ b/Documentation/devicetree/bindings/power/power-domain.yaml @@ -25,22 +25,20 @@ description: |+ properties: $nodename: - pattern: "^(power-controller|power-domain)(@.*)?$" + pattern: "^(power-controller|power-domain)([@-].*)?$" domain-idle-states: $ref: /schemas/types.yaml#/definitions/phandle-array - description: - A phandle of an idle-state that shall be soaked into a generic domain - power state. The idle state definitions are compatible with - domain-idle-state specified in - Documentation/devicetree/bindings/power/domain-idle-state.txt - phandles that are not compatible with domain-idle-state will be ignored. - The domain-idle-state property reflects the idle state of this PM domain - and not the idle states of the devices or sub-domains in the PM domain. - Devices and sub-domains have their own idle-states independent - of the parent domain's idle states. In the absence of this property, - the domain would be considered as capable of being powered-on - or powered-off. + description: | + Phandles of idle states that defines the available states for the + power-domain provider. The idle state definitions are compatible with the + domain-idle-state bindings, specified in ./domain-idle-state.yaml. + + Note that, the domain-idle-state property reflects the idle states of this + PM domain and not the idle states of the devices or sub-domains in the PM + domain. Devices and sub-domains have their own idle states independent of + the parent domain's idle states. In the absence of this property, the + domain would be considered as capable of being powered-on or powered-off. operating-points-v2: $ref: /schemas/types.yaml#/definitions/phandle-array diff --git a/Documentation/devicetree/bindings/power/power_domain.txt b/Documentation/devicetree/bindings/power/power_domain.txt index 5b09b2deb483..08497ef26c7a 100644 --- a/Documentation/devicetree/bindings/power/power_domain.txt +++ b/Documentation/devicetree/bindings/power/power_domain.txt @@ -109,4 +109,4 @@ Example: required-opps = <&domain1_opp_1>; }; -[1]. Documentation/devicetree/bindings/power/domain-idle-state.txt +[1]. Documentation/devicetree/bindings/power/domain-idle-state.yaml diff --git a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt index f5cdac8b2847..8b005192f6e8 100644 --- a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt +++ b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt @@ -161,7 +161,7 @@ The regulator node houses sub-nodes for each regulator within the device. Each sub-node is identified using the node's name, with valid values listed for each of the PMICs below. -pm8005: +pm8004: s2, s5 pm8005: diff --git a/Documentation/devicetree/bindings/regulator/regulator.yaml b/Documentation/devicetree/bindings/regulator/regulator.yaml index 92ff2e8ad572..91a39a33000b 100644 --- a/Documentation/devicetree/bindings/regulator/regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/regulator.yaml @@ -191,7 +191,7 @@ patternProperties: examples: - | - xyzreg: regulator@0 { + xyzreg: regulator { regulator-min-microvolt = <1000000>; regulator-max-microvolt = <2500000>; regulator-always-on; diff --git a/Documentation/devicetree/bindings/reset/intel,rcu-gw.yaml b/Documentation/devicetree/bindings/reset/intel,rcu-gw.yaml index 246dea8a2ec9..8ac437282659 100644 --- a/Documentation/devicetree/bindings/reset/intel,rcu-gw.yaml +++ b/Documentation/devicetree/bindings/reset/intel,rcu-gw.yaml @@ -23,7 +23,11 @@ properties: description: Global reset register offset and bit offset. allOf: - $ref: /schemas/types.yaml#/definitions/uint32-array - - maxItems: 2 + items: + - description: Register offset + - description: Register bit offset + minimum: 0 + maximum: 31 "#reset-cells": minimum: 2 diff --git a/Documentation/devicetree/bindings/reset/st,stm32mp1-rcc.txt b/Documentation/devicetree/bindings/reset/st,stm32mp1-rcc.txt index b4edaf7c7ff3..2880d5dda95e 100644 --- a/Documentation/devicetree/bindings/reset/st,stm32mp1-rcc.txt +++ b/Documentation/devicetree/bindings/reset/st,stm32mp1-rcc.txt @@ -3,4 +3,4 @@ STMicroelectronics STM32MP1 Peripheral Reset Controller The RCC IP is both a reset and a clock controller. -Please see Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.txt +Please see Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.yaml diff --git a/Documentation/devicetree/bindings/sound/st,stm32-sai.txt b/Documentation/devicetree/bindings/sound/st,stm32-sai.txt index 944743dd9212..c42b91e525fa 100644 --- a/Documentation/devicetree/bindings/sound/st,stm32-sai.txt +++ b/Documentation/devicetree/bindings/sound/st,stm32-sai.txt @@ -36,7 +36,7 @@ SAI subnodes required properties: - clock-names: Must contain "sai_ck". Must also contain "MCLK", if SAI shares a master clock, with a SAI set as MCLK clock provider. - - dmas: see Documentation/devicetree/bindings/dma/stm32-dma.txt + - dmas: see Documentation/devicetree/bindings/dma/st,stm32-dma.yaml - dma-names: identifier string for each DMA request line "tx": if sai sub-block is configured as playback DAI "rx": if sai sub-block is configured as capture DAI diff --git a/Documentation/devicetree/bindings/sound/st,stm32-spdifrx.txt b/Documentation/devicetree/bindings/sound/st,stm32-spdifrx.txt index 33826f2459fa..ca9101777c44 100644 --- a/Documentation/devicetree/bindings/sound/st,stm32-spdifrx.txt +++ b/Documentation/devicetree/bindings/sound/st,stm32-spdifrx.txt @@ -10,7 +10,7 @@ Required properties: - clock-names: must contain "kclk" - interrupts: cpu DAI interrupt line - dmas: DMA specifiers for audio data DMA and iec control flow DMA - See STM32 DMA bindings, Documentation/devicetree/bindings/dma/stm32-dma.txt + See STM32 DMA bindings, Documentation/devicetree/bindings/dma/st,stm32-dma.yaml - dma-names: two dmas have to be defined, "rx" and "rx-ctrl" Optional properties: diff --git a/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml b/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml index f0d979664f07..e49ecbf715ba 100644 --- a/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml +++ b/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml @@ -49,7 +49,7 @@ properties: dmas: description: | DMA specifiers for tx and rx dma. DMA fifo mode must be used. See - the STM32 DMA bindings Documentation/devicetree/bindings/dma/stm32-dma.txt. + the STM32 DMA bindings Documentation/devicetree/bindings/dma/st,stm32-dma.yaml. items: - description: rx DMA channel - description: tx DMA channel diff --git a/Documentation/devicetree/bindings/sram/allwinner,sun4i-a10-system-control.yaml b/Documentation/devicetree/bindings/sram/allwinner,sun4i-a10-system-control.yaml index 80bac7a182d5..4b5509436588 100644 --- a/Documentation/devicetree/bindings/sram/allwinner,sun4i-a10-system-control.yaml +++ b/Documentation/devicetree/bindings/sram/allwinner,sun4i-a10-system-control.yaml @@ -125,7 +125,7 @@ examples: #size-cells = <1>; ranges; - sram_a: sram@00000000 { + sram_a: sram@0 { compatible = "mmio-sram"; reg = <0x00000000 0xc000>; #address-cells = <1>; diff --git a/Documentation/devicetree/bindings/thermal/brcm,avs-ro-thermal.yaml b/Documentation/devicetree/bindings/thermal/brcm,avs-ro-thermal.yaml index d9fdf4809a49..f3e68ed03abf 100644 --- a/Documentation/devicetree/bindings/thermal/brcm,avs-ro-thermal.yaml +++ b/Documentation/devicetree/bindings/thermal/brcm,avs-ro-thermal.yaml @@ -17,7 +17,7 @@ description: |+ "brcm,bcm2711-avs-monitor", "syscon", "simple-mfd" Refer to the the bindings described in - Documentation/devicetree/bindings/mfd/syscon.txt + Documentation/devicetree/bindings/mfd/syscon.yaml properties: compatible: diff --git a/Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml b/Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml index 23e989e09766..d918cee100ac 100644 --- a/Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml +++ b/Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml @@ -87,7 +87,7 @@ additionalProperties: false examples: - | - timer { + timer@1c20c00 { compatible = "allwinner,sun4i-a10-timer"; reg = <0x01c20c00 0x400>; interrupts = <22>, diff --git a/Documentation/driver-api/dmaengine/client.rst b/Documentation/driver-api/dmaengine/client.rst index e5953e7e4bf4..2104830a99ae 100644 --- a/Documentation/driver-api/dmaengine/client.rst +++ b/Documentation/driver-api/dmaengine/client.rst @@ -151,8 +151,8 @@ The details of these operations are: Note that callbacks will always be invoked from the DMA engines tasklet, never from interrupt context. -Optional: per descriptor metadata ---------------------------------- + **Optional: per descriptor metadata** + DMAengine provides two ways for metadata support. DESC_METADATA_CLIENT @@ -199,12 +199,15 @@ Optional: per descriptor metadata DESC_METADATA_CLIENT - DMA_MEM_TO_DEV / DEV_MEM_TO_MEM: + 1. prepare the descriptor (dmaengine_prep_*) construct the metadata in the client's buffer 2. use dmaengine_desc_attach_metadata() to attach the buffer to the descriptor 3. submit the transfer + - DMA_DEV_TO_MEM: + 1. prepare the descriptor (dmaengine_prep_*) 2. use dmaengine_desc_attach_metadata() to attach the buffer to the descriptor @@ -215,6 +218,7 @@ Optional: per descriptor metadata DESC_METADATA_ENGINE - DMA_MEM_TO_DEV / DEV_MEM_TO_MEM: + 1. prepare the descriptor (dmaengine_prep_*) 2. use dmaengine_desc_get_metadata_ptr() to get the pointer to the engine's metadata area @@ -222,7 +226,9 @@ Optional: per descriptor metadata 4. use dmaengine_desc_set_metadata_len() to tell the DMA engine the amount of data the client has placed into the metadata buffer 5. submit the transfer + - DMA_DEV_TO_MEM: + 1. prepare the descriptor (dmaengine_prep_*) 2. submit the transfer 3. on transfer completion, use dmaengine_desc_get_metadata_ptr() to get @@ -278,8 +284,8 @@ Optional: per descriptor metadata void dma_async_issue_pending(struct dma_chan *chan); -Further APIs: -------------- +Further APIs +------------ 1. Terminate APIs diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.txt index dc497b96fa4f..55336a47a110 100644 --- a/Documentation/filesystems/debugfs.txt +++ b/Documentation/filesystems/debugfs.txt @@ -164,9 +164,9 @@ file. void __iomem *base; }; - struct dentry *debugfs_create_regset32(const char *name, umode_t mode, - struct dentry *parent, - struct debugfs_regset32 *regset); + debugfs_create_regset32(const char *name, umode_t mode, + struct dentry *parent, + struct debugfs_regset32 *regset); void debugfs_print_regs32(struct seq_file *s, struct debugfs_reg32 *regs, int nregs, void __iomem *base, char *prefix); diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index f18506083ced..26c093969573 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -850,3 +850,11 @@ business doing so. d_alloc_pseudo() is internal-only; uses outside of alloc_file_pseudo() are very suspect (and won't work in modules). Such uses are very likely to be misspelled d_alloc_anon(). + +--- + +**mandatory** + +[should've been added in 2016] stale comment in finish_open() nonwithstanding, +failure exits in ->atomic_open() instances should *NOT* fput() the file, +no matter what. Everything is handled by the caller. diff --git a/Documentation/hwmon/adm1177.rst b/Documentation/hwmon/adm1177.rst index c81e0b4abd28..471be1e98d6f 100644 --- a/Documentation/hwmon/adm1177.rst +++ b/Documentation/hwmon/adm1177.rst @@ -20,8 +20,7 @@ Usage Notes ----------- This driver does not auto-detect devices. You will have to instantiate the -devices explicitly. Please see Documentation/i2c/instantiating-devices for -details. +devices explicitly. Please see :doc:`/i2c/instantiating-devices` for details. Sysfs entries diff --git a/Documentation/networking/6lowpan.txt b/Documentation/networking/6lowpan.rst index 2e5a939d7e6f..e70a6520cc33 100644 --- a/Documentation/networking/6lowpan.txt +++ b/Documentation/networking/6lowpan.rst @@ -1,37 +1,40 @@ +.. SPDX-License-Identifier: GPL-2.0 -Netdev private dataroom for 6lowpan interfaces: +============================================== +Netdev private dataroom for 6lowpan interfaces +============================================== All 6lowpan able net devices, means all interfaces with ARPHRD_6LOWPAN, must have "struct lowpan_priv" placed at beginning of netdev_priv. -The priv_size of each interface should be calculate by: +The priv_size of each interface should be calculate by:: dev->priv_size = LOWPAN_PRIV_SIZE(LL_6LOWPAN_PRIV_DATA); Where LL_PRIV_6LOWPAN_DATA is sizeof linklayer 6lowpan private data struct. -To access the LL_PRIV_6LOWPAN_DATA structure you can cast: +To access the LL_PRIV_6LOWPAN_DATA structure you can cast:: lowpan_priv(dev)-priv; to your LL_6LOWPAN_PRIV_DATA structure. -Before registering the lowpan netdev interface you must run: +Before registering the lowpan netdev interface you must run:: lowpan_netdev_setup(dev, LOWPAN_LLTYPE_FOOBAR); wheres LOWPAN_LLTYPE_FOOBAR is a define for your 6LoWPAN linklayer type of enum lowpan_lltypes. -Example to evaluate the private usually you can do: +Example to evaluate the private usually you can do:: -static inline struct lowpan_priv_foobar * -lowpan_foobar_priv(struct net_device *dev) -{ + static inline struct lowpan_priv_foobar * + lowpan_foobar_priv(struct net_device *dev) + { return (struct lowpan_priv_foobar *)lowpan_priv(dev)->priv; -} + } -switch (dev->type) { -case ARPHRD_6LOWPAN: + switch (dev->type) { + case ARPHRD_6LOWPAN: lowpan_priv = lowpan_priv(dev); /* do great stuff which is ARPHRD_6LOWPAN related */ switch (lowpan_priv->lltype) { @@ -42,8 +45,8 @@ case ARPHRD_6LOWPAN: ... } break; -... -} + ... + } In case of generic 6lowpan branch ("net/6lowpan") you can remove the check on ARPHRD_6LOWPAN, because you can be sure that these function are called diff --git a/Documentation/networking/device_drivers/stmicro/stmmac.rst b/Documentation/networking/device_drivers/stmicro/stmmac.rst index c34bab3d2df0..5d46e5036129 100644 --- a/Documentation/networking/device_drivers/stmicro/stmmac.rst +++ b/Documentation/networking/device_drivers/stmicro/stmmac.rst @@ -32,7 +32,8 @@ is also supported. DesignWare(R) Cores Ethernet MAC 10/100/1000 Universal version 3.70a (and older) and DesignWare(R) Cores Ethernet Quality-of-Service version 4.0 (and upper) have been used for developing this driver as well as -DesignWare(R) Cores XGMAC - 10G Ethernet MAC. +DesignWare(R) Cores XGMAC - 10G Ethernet MAC and DesignWare(R) Cores +Enterprise MAC - 100G Ethernet MAC. This driver supports both the platform bus and PCI. @@ -48,6 +49,8 @@ Cores Ethernet Controllers and corresponding minimum and maximum versions: +-------------------------------+--------------+--------------+--------------+ | XGMAC - 10G Ethernet MAC | 2.10a | N/A | XGMAC2+ | +-------------------------------+--------------+--------------+--------------+ +| XLGMAC - 100G Ethernet MAC | 2.00a | N/A | XLGMAC2+ | ++-------------------------------+--------------+--------------+--------------+ For questions related to hardware requirements, refer to the documentation supplied with your Ethernet adapter. All hardware requirements listed apply @@ -57,7 +60,7 @@ Feature List ============ The following features are available in this driver: - - GMII/MII/RGMII/SGMII/RMII/XGMII Interface + - GMII/MII/RGMII/SGMII/RMII/XGMII/XLGMII Interface - Half-Duplex / Full-Duplex Operation - Energy Efficient Ethernet (EEE) - IEEE 802.3x PAUSE Packets (Flow Control) diff --git a/Documentation/networking/devlink/devlink-info.rst b/Documentation/networking/devlink/devlink-info.rst index 70981dd1b981..b701899b7f42 100644 --- a/Documentation/networking/devlink/devlink-info.rst +++ b/Documentation/networking/devlink/devlink-info.rst @@ -98,3 +98,8 @@ fw.roce RoCE firmware version which is responsible for handling roce management. + +fw.bundle_id +------------ + +Unique identifier of the entire firmware bundle. diff --git a/Documentation/networking/devlink/devlink-region.rst b/Documentation/networking/devlink/devlink-region.rst index 1a7683e7acb2..8b46e8591fe0 100644 --- a/Documentation/networking/devlink/devlink-region.rst +++ b/Documentation/networking/devlink/devlink-region.rst @@ -40,9 +40,6 @@ example usage # Delete a snapshot using: $ devlink region del pci/0000:00:05.0/cr-space snapshot 1 - # Trigger (request) a snapshot be taken: - $ devlink region trigger pci/0000:00:05.0/cr-space - # Dump a snapshot: $ devlink region dump pci/0000:00:05.0/fw-health snapshot 1 0000000000000000 0014 95dc 0014 9514 0035 1670 0034 db30 diff --git a/Documentation/networking/devlink/ice.rst b/Documentation/networking/devlink/ice.rst new file mode 100644 index 000000000000..37fbbd40a5e5 --- /dev/null +++ b/Documentation/networking/devlink/ice.rst @@ -0,0 +1,71 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=================== +ice devlink support +=================== + +This document describes the devlink features implemented by the ``ice`` +device driver. + +Info versions +============= + +The ``ice`` driver reports the following versions + +.. list-table:: devlink info versions implemented + :widths: 5 5 5 90 + + * - Name + - Type + - Example + - Description + * - ``board.id`` + - fixed + - K65390-000 + - The Product Board Assembly (PBA) identifier of the board. + * - ``fw.mgmt`` + - running + - 2.1.7 + - 3-digit version number of the management firmware that controls the + PHY, link, etc. + * - ``fw.mgmt.api`` + - running + - 1.5 + - 2-digit version number of the API exported over the AdminQ by the + management firmware. Used by the driver to identify what commands + are supported. + * - ``fw.mgmt.build`` + - running + - 0x305d955f + - Unique identifier of the source for the management firmware. + * - ``fw.undi`` + - running + - 1.2581.0 + - Version of the Option ROM containing the UEFI driver. The version is + reported in ``major.minor.patch`` format. The major version is + incremented whenever a major breaking change occurs, or when the + minor version would overflow. The minor version is incremented for + non-breaking changes and reset to 1 when the major version is + incremented. The patch version is normally 0 but is incremented when + a fix is delivered as a patch against an older base Option ROM. + * - ``fw.psid.api`` + - running + - 0.80 + - Version defining the format of the flash contents. + * - ``fw.bundle_id`` + - running + - 0x80002ec0 + - Unique identifier of the firmware image file that was loaded onto + the device. Also referred to as the EETRACK identifier of the NVM. + * - ``fw.app.name`` + - running + - ICE OS Default Package + - The name of the DDP package that is active in the device. The DDP + package is loaded by the driver during initialization. Each varation + of DDP package shall have a unique name. + * - ``fw.app`` + - running + - 1.3.1.0 + - The version of the DDP package that is active in the device. Note + that both the name (as reported by ``fw.app.name``) and version are + required to uniquely identify the package. diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst index 087ff54d53fc..272509cd9215 100644 --- a/Documentation/networking/devlink/index.rst +++ b/Documentation/networking/devlink/index.rst @@ -32,6 +32,7 @@ parameters, info versions, and other features it supports. bnxt ionic + ice mlx4 mlx5 mlxsw diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index f1f868479ceb..31a601cafa3f 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -189,6 +189,14 @@ Userspace to kernel: ``ETHTOOL_MSG_DEBUG_SET`` set debugging settings ``ETHTOOL_MSG_WOL_GET`` get wake-on-lan settings ``ETHTOOL_MSG_WOL_SET`` set wake-on-lan settings + ``ETHTOOL_MSG_FEATURES_GET`` get device features + ``ETHTOOL_MSG_FEATURES_SET`` set device features + ``ETHTOOL_MSG_PRIVFLAGS_GET`` get private flags + ``ETHTOOL_MSG_PRIVFLAGS_SET`` set private flags + ``ETHTOOL_MSG_RINGS_GET`` get ring sizes + ``ETHTOOL_MSG_RINGS_SET`` set ring sizes + ``ETHTOOL_MSG_CHANNELS_GET`` get channel counts + ``ETHTOOL_MSG_CHANNELS_SET`` set channel counts ===================================== ================================ Kernel to userspace: @@ -204,6 +212,15 @@ Kernel to userspace: ``ETHTOOL_MSG_DEBUG_NTF`` debugging settings notification ``ETHTOOL_MSG_WOL_GET_REPLY`` wake-on-lan settings ``ETHTOOL_MSG_WOL_NTF`` wake-on-lan settings notification + ``ETHTOOL_MSG_FEATURES_GET_REPLY`` device features + ``ETHTOOL_MSG_FEATURES_SET_REPLY`` optional reply to FEATURES_SET + ``ETHTOOL_MSG_FEATURES_NTF`` netdev features notification + ``ETHTOOL_MSG_PRIVFLAGS_GET_REPLY`` private flags + ``ETHTOOL_MSG_PRIVFLAGS_NTF`` private flags + ``ETHTOOL_MSG_RINGS_GET_REPLY`` ring sizes + ``ETHTOOL_MSG_RINGS_NTF`` ring sizes + ``ETHTOOL_MSG_CHANNELS_GET_REPLY`` channel counts + ``ETHTOOL_MSG_CHANNELS_NTF`` channel counts ===================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -521,6 +538,213 @@ Request contents: ``WAKE_MAGICSECURE`` mode. +FEATURES_GET +============ + +Gets netdev features like ``ETHTOOL_GFEATURES`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_FEATURES_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_FEATURES_HEADER`` nested reply header + ``ETHTOOL_A_FEATURES_HW`` bitset dev->hw_features + ``ETHTOOL_A_FEATURES_WANTED`` bitset dev->wanted_features + ``ETHTOOL_A_FEATURES_ACTIVE`` bitset dev->features + ``ETHTOOL_A_FEATURES_NOCHANGE`` bitset NETIF_F_NEVER_CHANGE + ==================================== ====== ========================== + +Bitmaps in kernel response have the same meaning as bitmaps used in ioctl +interference but attribute names are different (they are based on +corresponding members of struct net_device). Legacy "flags" are not provided, +if userspace needs them (most likely only ethtool for backward compatibility), +it can calculate their values from related feature bits itself. +ETHA_FEATURES_HW uses mask consisting of all features recognized by kernel (to +provide all names when using verbose bitmap format), the other three use no +mask (simple bit lists). + + +FEATURES_SET +============ + +Request to set netdev features like ``ETHTOOL_SFEATURES`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_FEATURES_HEADER`` nested request header + ``ETHTOOL_A_FEATURES_WANTED`` bitset requested features + ==================================== ====== ========================== + +Kernel response contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_FEATURES_HEADER`` nested reply header + ``ETHTOOL_A_FEATURES_WANTED`` bitset diff wanted vs. result + ``ETHTOOL_A_FEATURES_ACTIVE`` bitset diff old vs. new active + ==================================== ====== ========================== + +Request constains only one bitset which can be either value/mask pair (request +to change specific feature bits and leave the rest) or only a value (request +to set all features to specified set). + +As request is subject to netdev_change_features() sanity checks, optional +kernel reply (can be suppressed by ``ETHTOOL_FLAG_OMIT_REPLY`` flag in request +header) informs client about the actual result. ``ETHTOOL_A_FEATURES_WANTED`` +reports the difference between client request and actual result: mask consists +of bits which differ between requested features and result (dev->features +after the operation), value consists of values of these bits in the request +(i.e. negated values from resulting features). ``ETHTOOL_A_FEATURES_ACTIVE`` +reports the difference between old and new dev->features: mask consists of +bits which have changed, values are their values in new dev->features (after +the operation). + +``ETHTOOL_MSG_FEATURES_NTF`` notification is sent not only if device features +are modified using ``ETHTOOL_MSG_FEATURES_SET`` request or on of ethtool ioctl +request but also each time features are modified with netdev_update_features() +or netdev_change_features(). + + +PRIVFLAGS_GET +============= + +Gets private flags like ``ETHTOOL_GPFLAGS`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_PRIVFLAGS_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_PRIVFLAGS_HEADER`` nested reply header + ``ETHTOOL_A_PRIVFLAGS_FLAGS`` bitset private flags + ==================================== ====== ========================== + +``ETHTOOL_A_PRIVFLAGS_FLAGS`` is a bitset with values of device private flags. +These flags are defined by driver, their number and names (and also meaning) +are device dependent. For compact bitset format, names can be retrieved as +``ETH_SS_PRIV_FLAGS`` string set. If verbose bitset format is requested, +response uses all private flags supported by the device as mask so that client +gets the full information without having to fetch the string set with names. + + +PRIVFLAGS_SET +============= + +Sets or modifies values of device private flags like ``ETHTOOL_SPFLAGS`` +ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_PRIVFLAGS_HEADER`` nested request header + ``ETHTOOL_A_PRIVFLAGS_FLAGS`` bitset private flags + ==================================== ====== ========================== + +``ETHTOOL_A_PRIVFLAGS_FLAGS`` can either set the whole set of private flags or +modify only values of some of them. + + +RINGS_GET +========= + +Gets ring sizes like ``ETHTOOL_GRINGPARAM`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_RINGS_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_RINGS_HEADER`` nested reply header + ``ETHTOOL_A_RINGS_RX_MAX`` u32 max size of RX ring + ``ETHTOOL_A_RINGS_RX_MINI_MAX`` u32 max size of RX mini ring + ``ETHTOOL_A_RINGS_RX_JUMBO_MAX`` u32 max size of RX jumbo ring + ``ETHTOOL_A_RINGS_TX_MAX`` u32 max size of TX ring + ``ETHTOOL_A_RINGS_RX`` u32 size of RX ring + ``ETHTOOL_A_RINGS_RX_MINI`` u32 size of RX mini ring + ``ETHTOOL_A_RINGS_RX_JUMBO`` u32 size of RX jumbo ring + ``ETHTOOL_A_RINGS_TX`` u32 size of TX ring + ==================================== ====== ========================== + + +RINGS_SET +========= + +Sets ring sizes like ``ETHTOOL_SRINGPARAM`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_RINGS_HEADER`` nested reply header + ``ETHTOOL_A_RINGS_RX`` u32 size of RX ring + ``ETHTOOL_A_RINGS_RX_MINI`` u32 size of RX mini ring + ``ETHTOOL_A_RINGS_RX_JUMBO`` u32 size of RX jumbo ring + ``ETHTOOL_A_RINGS_TX`` u32 size of TX ring + ==================================== ====== ========================== + +Kernel checks that requested ring sizes do not exceed limits reported by +driver. Driver may impose additional constraints and may not suspport all +attributes. + + +CHANNELS_GET +============ + +Gets channel counts like ``ETHTOOL_GCHANNELS`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_CHANNELS_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_CHANNELS_HEADER`` nested reply header + ``ETHTOOL_A_CHANNELS_RX_MAX`` u32 max receive channels + ``ETHTOOL_A_CHANNELS_TX_MAX`` u32 max transmit channels + ``ETHTOOL_A_CHANNELS_OTHER_MAX`` u32 max other channels + ``ETHTOOL_A_CHANNELS_COMBINED_MAX`` u32 max combined channels + ``ETHTOOL_A_CHANNELS_RX_COUNT`` u32 receive channel count + ``ETHTOOL_A_CHANNELS_TX_COUNT`` u32 transmit channel count + ``ETHTOOL_A_CHANNELS_OTHER_COUNT`` u32 other channel count + ``ETHTOOL_A_CHANNELS_COMBINED_COUNT`` u32 combined channel count + ===================================== ====== ========================== + + +CHANNELS_SET +============ + +Sets channel counts like ``ETHTOOL_SCHANNELS`` ioctl request. + +Request contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_CHANNELS_HEADER`` nested request header + ``ETHTOOL_A_CHANNELS_RX_COUNT`` u32 receive channel count + ``ETHTOOL_A_CHANNELS_TX_COUNT`` u32 transmit channel count + ``ETHTOOL_A_CHANNELS_OTHER_COUNT`` u32 other channel count + ``ETHTOOL_A_CHANNELS_COMBINED_COUNT`` u32 combined channel count + ===================================== ====== ========================== + +Kernel checks that requested channel counts do not exceed limits reported by +driver. Driver may impose additional constraints and may not suspport all +attributes. + + Request translation =================== @@ -547,35 +771,35 @@ have their netlink replacement yet. ``ETHTOOL_SEEPROM`` n/a ``ETHTOOL_GCOALESCE`` n/a ``ETHTOOL_SCOALESCE`` n/a - ``ETHTOOL_GRINGPARAM`` n/a - ``ETHTOOL_SRINGPARAM`` n/a + ``ETHTOOL_GRINGPARAM`` ``ETHTOOL_MSG_RINGS_GET`` + ``ETHTOOL_SRINGPARAM`` ``ETHTOOL_MSG_RINGS_SET`` ``ETHTOOL_GPAUSEPARAM`` n/a ``ETHTOOL_SPAUSEPARAM`` n/a - ``ETHTOOL_GRXCSUM`` n/a - ``ETHTOOL_SRXCSUM`` n/a - ``ETHTOOL_GTXCSUM`` n/a - ``ETHTOOL_STXCSUM`` n/a - ``ETHTOOL_GSG`` n/a - ``ETHTOOL_SSG`` n/a + ``ETHTOOL_GRXCSUM`` ``ETHTOOL_MSG_FEATURES_GET`` + ``ETHTOOL_SRXCSUM`` ``ETHTOOL_MSG_FEATURES_SET`` + ``ETHTOOL_GTXCSUM`` ``ETHTOOL_MSG_FEATURES_GET`` + ``ETHTOOL_STXCSUM`` ``ETHTOOL_MSG_FEATURES_SET`` + ``ETHTOOL_GSG`` ``ETHTOOL_MSG_FEATURES_GET`` + ``ETHTOOL_SSG`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_TEST`` n/a ``ETHTOOL_GSTRINGS`` ``ETHTOOL_MSG_STRSET_GET`` ``ETHTOOL_PHYS_ID`` n/a ``ETHTOOL_GSTATS`` n/a - ``ETHTOOL_GTSO`` n/a - ``ETHTOOL_STSO`` n/a + ``ETHTOOL_GTSO`` ``ETHTOOL_MSG_FEATURES_GET`` + ``ETHTOOL_STSO`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_GPERMADDR`` rtnetlink ``RTM_GETLINK`` - ``ETHTOOL_GUFO`` n/a - ``ETHTOOL_SUFO`` n/a - ``ETHTOOL_GGSO`` n/a - ``ETHTOOL_SGSO`` n/a - ``ETHTOOL_GFLAGS`` n/a - ``ETHTOOL_SFLAGS`` n/a - ``ETHTOOL_GPFLAGS`` n/a - ``ETHTOOL_SPFLAGS`` n/a + ``ETHTOOL_GUFO`` ``ETHTOOL_MSG_FEATURES_GET`` + ``ETHTOOL_SUFO`` ``ETHTOOL_MSG_FEATURES_SET`` + ``ETHTOOL_GGSO`` ``ETHTOOL_MSG_FEATURES_GET`` + ``ETHTOOL_SGSO`` ``ETHTOOL_MSG_FEATURES_SET`` + ``ETHTOOL_GFLAGS`` ``ETHTOOL_MSG_FEATURES_GET`` + ``ETHTOOL_SFLAGS`` ``ETHTOOL_MSG_FEATURES_SET`` + ``ETHTOOL_GPFLAGS`` ``ETHTOOL_MSG_PRIVFLAGS_GET`` + ``ETHTOOL_SPFLAGS`` ``ETHTOOL_MSG_PRIVFLAGS_SET`` ``ETHTOOL_GRXFH`` n/a ``ETHTOOL_SRXFH`` n/a - ``ETHTOOL_GGRO`` n/a - ``ETHTOOL_SGRO`` n/a + ``ETHTOOL_GGRO`` ``ETHTOOL_MSG_FEATURES_GET`` + ``ETHTOOL_SGRO`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_GRXRINGS`` n/a ``ETHTOOL_GRXCLSRLCNT`` n/a ``ETHTOOL_GRXCLSRULE`` n/a @@ -589,10 +813,10 @@ have their netlink replacement yet. ``ETHTOOL_GSSET_INFO`` ``ETHTOOL_MSG_STRSET_GET`` ``ETHTOOL_GRXFHINDIR`` n/a ``ETHTOOL_SRXFHINDIR`` n/a - ``ETHTOOL_GFEATURES`` n/a - ``ETHTOOL_SFEATURES`` n/a - ``ETHTOOL_GCHANNELS`` n/a - ``ETHTOOL_SCHANNELS`` n/a + ``ETHTOOL_GFEATURES`` ``ETHTOOL_MSG_FEATURES_GET`` + ``ETHTOOL_SFEATURES`` ``ETHTOOL_MSG_FEATURES_SET`` + ``ETHTOOL_GCHANNELS`` ``ETHTOOL_MSG_CHANNELS_GET`` + ``ETHTOOL_SCHANNELS`` ``ETHTOOL_MSG_CHANNELS_SET`` ``ETHTOOL_SET_DUMP`` n/a ``ETHTOOL_GET_DUMP_FLAG`` n/a ``ETHTOOL_GET_DUMP_DATA`` n/a diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index c4a328f2d57a..2f0f8b17dade 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -606,7 +606,7 @@ before a conversion to the new layout is being done behind the scenes! Currently, the classic BPF format is being used for JITing on most 32-bit architectures, whereas x86-64, aarch64, s390x, powerpc64, -sparc64, arm32, riscv (RV64G) perform JIT compilation from eBPF +sparc64, arm32, riscv64, riscv32 perform JIT compilation from eBPF instruction set. Some core changes of the new internal format: diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index 3a83cfb66704..50133d9761c9 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -34,6 +34,7 @@ Contents: tls tls-offload nfc + 6lowpan .. only:: subproject and html diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 5f53faff4e25..ee961d322d93 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -958,6 +958,15 @@ ip_nonlocal_bind - BOOLEAN which can be quite useful - but may break some applications. Default: 0 +ip_autobind_reuse - BOOLEAN + By default, bind() does not select the ports automatically even if + the new socket and all sockets bound to the port have SO_REUSEADDR. + ip_autobind_reuse allows bind() to reuse the port and this is useful + when you use bind()+connect(), but may break some applications. + The preferred solution is to use IP_BIND_ADDRESS_NO_PORT and this + option should only be set by experts. + Default: 0 + ip_dynaddr - BOOLEAN If set non-zero, enables support for dynamic addresses. If set to a non-zero value larger than 1, a kernel log diff --git a/Documentation/networking/net_failover.rst b/Documentation/networking/net_failover.rst index 06c97dcb57ca..e143ab79a960 100644 --- a/Documentation/networking/net_failover.rst +++ b/Documentation/networking/net_failover.rst @@ -8,9 +8,9 @@ Overview ======== The net_failover driver provides an automated failover mechanism via APIs -to create and destroy a failover master netdev and mananges a primary and +to create and destroy a failover master netdev and manages a primary and standby slave netdevs that get registered via the generic failover -infrastructrure. +infrastructure. The failover netdev acts a master device and controls 2 slave devices. The original paravirtual interface is registered as 'standby' slave netdev and @@ -29,7 +29,7 @@ virtio-net accelerated datapath: STANDBY mode ============================================= net_failover enables hypervisor controlled accelerated datapath to virtio-net -enabled VMs in a transparent manner with no/minimal guest userspace chanages. +enabled VMs in a transparent manner with no/minimal guest userspace changes. To support this, the hypervisor needs to enable VIRTIO_NET_F_STANDBY feature on the virtio-net interface and assign the same MAC address to both diff --git a/Documentation/networking/rds.txt b/Documentation/networking/rds.txt index f2a0147c933d..eec61694e894 100644 --- a/Documentation/networking/rds.txt +++ b/Documentation/networking/rds.txt @@ -159,7 +159,7 @@ Socket Interface set SO_RDS_TRANSPORT on a socket for which the transport has been previously attached explicitly (by SO_RDS_TRANSPORT) or implicitly (via bind(2)) will return an error of EOPNOTSUPP. - An attempt to set SO_RDS_TRANSPPORT to RDS_TRANS_NONE will + An attempt to set SO_RDS_TRANSPORT to RDS_TRANS_NONE will always return EINVAL. RDMA for RDS diff --git a/Documentation/power/index.rst b/Documentation/power/index.rst index 002e42745263..ced8a8007434 100644 --- a/Documentation/power/index.rst +++ b/Documentation/power/index.rst @@ -13,7 +13,6 @@ Power Management drivers-testing energy-model freezing-of-tasks - interface opp pci pm_qos_interface diff --git a/MAINTAINERS b/MAINTAINERS index 6918c3f0ff1c..97dce264bc7c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -176,7 +176,7 @@ L: linux-wpan@vger.kernel.org S: Maintained F: net/6lowpan/ F: include/net/6lowpan.h -F: Documentation/networking/6lowpan.txt +F: Documentation/networking/6lowpan.rst 6PACK NETWORK DRIVER FOR AX.25 M: Andreas Koensgen <ajk@comnets.uni-bremen.de> @@ -693,7 +693,7 @@ ALLWINNER CPUFREQ DRIVER M: Yangtao Li <tiny.windzz@gmail.com> L: linux-pm@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/opp/sun50i-nvmem-cpufreq.txt +F: Documentation/devicetree/bindings/opp/allwinner,sun50i-h6-operating-points.yaml F: drivers/cpufreq/sun50i-cpufreq-nvmem.c ALLWINNER CRYPTO DRIVERS @@ -3213,11 +3213,22 @@ L: bpf@vger.kernel.org S: Maintained F: arch/powerpc/net/ -BPF JIT for RISC-V (RV64G) +BPF JIT for RISC-V (32-bit) +M: Luke Nelson <luke.r.nels@gmail.com> +M: Xi Wang <xi.wang@gmail.com> +L: netdev@vger.kernel.org +L: bpf@vger.kernel.org +S: Maintained +F: arch/riscv/net/ +X: arch/riscv/net/bpf_jit_comp64.c + +BPF JIT for RISC-V (64-bit) M: Björn Töpel <bjorn.topel@gmail.com> L: netdev@vger.kernel.org +L: bpf@vger.kernel.org S: Maintained F: arch/riscv/net/ +X: arch/riscv/net/bpf_jit_comp32.c BPF JIT for S390 M: Ilya Leoshkevich <iii@linux.ibm.com> @@ -4017,7 +4028,7 @@ M: Cheng-Yi Chiang <cychiang@chromium.org> S: Maintained R: Enric Balletbo i Serra <enric.balletbo@collabora.com> R: Guenter Roeck <groeck@chromium.org> -F: Documentation/devicetree/bindings/sound/google,cros-ec-codec.txt +F: Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml F: sound/soc/codecs/cros_ec_codec.* CIRRUS LOGIC AUDIO CODEC DRIVERS @@ -4073,7 +4084,6 @@ F: drivers/scsi/snic/ CISCO VIC ETHERNET NIC DRIVER M: Christian Benvenuti <benve@cisco.com> M: Govindarajulu Varadarajan <_govind@gmx.com> -M: Parvi Kaustubhi <pkaustub@cisco.com> S: Supported F: drivers/net/ethernet/cisco/enic/ @@ -4475,7 +4485,7 @@ L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git S: Maintained F: drivers/media/platform/sunxi/sun6i-csi/ -F: Documentation/devicetree/bindings/media/sun6i-csi.txt +F: Documentation/devicetree/bindings/media/allwinner,sun6i-a31-csi.yaml CW1200 WLAN driver M: Solomon Peachy <pizza@shaftnet.org> @@ -4572,7 +4582,7 @@ F: drivers/infiniband/hw/cxgb4/ F: include/uapi/rdma/cxgb4-abi.h CXGB4VF ETHERNET DRIVER (CXGB4VF) -M: Casey Leedom <leedom@chelsio.com> +M: Vishal Kulkarni <vishal@gmail.com> L: netdev@vger.kernel.org W: http://www.chelsio.com S: Supported @@ -5668,7 +5678,7 @@ L: dri-devel@lists.freedesktop.org T: git git://anongit.freedesktop.org/drm/drm-misc S: Maintained F: drivers/gpu/drm/stm -F: Documentation/devicetree/bindings/display/st,stm32-ltdc.txt +F: Documentation/devicetree/bindings/display/st,stm32-ltdc.yaml DRM DRIVERS FOR TI LCDC M: Jyri Sarha <jsarha@ti.com> @@ -6198,7 +6208,6 @@ S: Supported F: drivers/scsi/be2iscsi/ Emulex 10Gbps NIC BE2, BE3-R, Lancer, Skyhawk-R DRIVER (be2net) -M: Sathya Perla <sathya.perla@broadcom.com> M: Ajit Khaparde <ajit.khaparde@broadcom.com> M: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com> M: Somnath Kotur <somnath.kotur@broadcom.com> @@ -7738,7 +7747,7 @@ Hyper-V CORE AND DRIVERS M: "K. Y. Srinivasan" <kys@microsoft.com> M: Haiyang Zhang <haiyangz@microsoft.com> M: Stephen Hemminger <sthemmin@microsoft.com> -M: Sasha Levin <sashal@kernel.org> +M: Wei Liu <wei.liu@kernel.org> T: git git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git L: linux-hyperv@vger.kernel.org S: Supported @@ -9352,6 +9361,8 @@ F: include/net/l3mdev.h L7 BPF FRAMEWORK M: John Fastabend <john.fastabend@gmail.com> M: Daniel Borkmann <daniel@iogearbox.net> +M: Jakub Sitnicki <jakub@cloudflare.com> +M: Lorenz Bauer <lmb@cloudflare.com> L: netdev@vger.kernel.org L: bpf@vger.kernel.org S: Maintained @@ -9359,6 +9370,7 @@ F: include/linux/skmsg.h F: net/core/skmsg.c F: net/core/sock_map.c F: net/ipv4/tcp_bpf.c +F: net/ipv4/udp_bpf.c LANTIQ / INTEL Ethernet drivers M: Hauke Mehrtens <hauke@hauke-m.de> @@ -10164,7 +10176,7 @@ MAXBOTIX ULTRASONIC RANGER IIO DRIVER M: Andreas Klinger <ak@it-klinger.de> L: linux-iio@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/iio/proximity/maxbotix,mb1232.txt +F: Documentation/devicetree/bindings/iio/proximity/maxbotix,mb1232.yaml F: drivers/iio/proximity/mb1232.c MAXIM MAX77650 PMIC MFD DRIVER @@ -10467,7 +10479,7 @@ M: Hugues Fruchet <hugues.fruchet@st.com> L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git S: Supported -F: Documentation/devicetree/bindings/media/st,stm32-dcmi.txt +F: Documentation/devicetree/bindings/media/st,stm32-dcmi.yaml F: drivers/media/platform/stm32/stm32-dcmi.c MEDIA DRIVERS FOR NVIDIA TEGRA - VDE @@ -11119,7 +11131,7 @@ M: Thomas Bogendoerfer <tsbogend@alpha.franken.de> L: linux-mips@vger.kernel.org W: http://www.linux-mips.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mips/linux.git -Q: http://patchwork.linux-mips.org/project/linux-mips/list/ +Q: https://patchwork.kernel.org/project/linux-mips/list/ S: Maintained F: Documentation/devicetree/bindings/mips/ F: Documentation/mips/ @@ -12739,7 +12751,7 @@ M: Tom Joseph <tjoseph@cadence.com> L: linux-pci@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/pci/cdns,*.txt -F: drivers/pci/controller/pcie-cadence* +F: drivers/pci/controller/cadence/ PCI DRIVER FOR FREESCALE LAYERSCAPE M: Minghuan Lian <minghuan.Lian@nxp.com> @@ -12952,7 +12964,6 @@ M: Robert Richter <rrichter@marvell.com> L: linux-pci@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Supported -F: Documentation/devicetree/bindings/pci/pci-thunder-* F: drivers/pci/controller/pci-thunder-* PCIE DRIVER FOR HISILICON @@ -14233,7 +14244,7 @@ F: include/dt-bindings/reset/ F: include/linux/reset.h F: include/linux/reset/ F: include/linux/reset-controller.h -K: \b(?:devm_|of_)?reset_control(?:ler_[a-z]+|_[a-z_]+)?\b +K: \b(?:devm_|of_)?reset_control(?:ler_[a-z]+|_[a-z_]+)?\b RESTARTABLE SEQUENCES SUPPORT M: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> @@ -15928,7 +15939,7 @@ F: drivers/*/stm32-*timer* F: drivers/pwm/pwm-stm32* F: include/linux/*/stm32-*tim* F: Documentation/ABI/testing/*timer-stm32 -F: Documentation/devicetree/bindings/*/stm32-*timer* +F: Documentation/devicetree/bindings/*/*stm32-*timer* F: Documentation/devicetree/bindings/pwm/pwm-stm32* STMMAC ETHERNET DRIVER @@ -16087,6 +16098,8 @@ SYNOPSYS DESIGNWARE 8250 UART DRIVER R: Andy Shevchenko <andriy.shevchenko@linux.intel.com> S: Maintained F: drivers/tty/serial/8250/8250_dw.c +F: drivers/tty/serial/8250/8250_dwlib.* +F: drivers/tty/serial/8250/8250_lpss.c SYNOPSYS DESIGNWARE APB GPIO DRIVER M: Hoan Tran <hoan@os.amperecomputing.com> @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 6 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc5 NAME = Kleptomaniac Octopus # *DOCUMENTATION* diff --git a/arch/Kconfig b/arch/Kconfig index 98de654b79b3..17fe351cdde0 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -738,8 +738,9 @@ config HAVE_STACK_VALIDATION config HAVE_RELIABLE_STACKTRACE bool help - Architecture has a save_stack_trace_tsk_reliable() function which - only returns a stack trace if it can guarantee the trace is reliable. + Architecture has either save_stack_trace_tsk_reliable() or + arch_stack_walk_reliable() function which only returns a stack trace + if it can guarantee the trace is reliable. config HAVE_ARCH_HASH bool diff --git a/arch/arm/boot/dts/am437x-idk-evm.dts b/arch/arm/boot/dts/am437x-idk-evm.dts index f3ced6df0c9b..9f66f96d09c9 100644 --- a/arch/arm/boot/dts/am437x-idk-evm.dts +++ b/arch/arm/boot/dts/am437x-idk-evm.dts @@ -526,11 +526,11 @@ * Supply voltage supervisor on board will not allow opp50 so * disable it and set opp100 as suspend OPP. */ - opp50@300000000 { + opp50-300000000 { status = "disabled"; }; - opp100@600000000 { + opp100-600000000 { opp-suspend; }; }; diff --git a/arch/arm/boot/dts/bcm2711-rpi-4-b.dts b/arch/arm/boot/dts/bcm2711-rpi-4-b.dts index 1b5a835f66bd..efea891b1a76 100644 --- a/arch/arm/boot/dts/bcm2711-rpi-4-b.dts +++ b/arch/arm/boot/dts/bcm2711-rpi-4-b.dts @@ -21,6 +21,7 @@ aliases { ethernet0 = &genet; + pcie0 = &pcie0; }; leds { @@ -31,6 +32,8 @@ pwr { label = "PWR"; gpios = <&expgpio 2 GPIO_ACTIVE_LOW>; + default-state = "keep"; + linux,default-trigger = "default-on"; }; }; diff --git a/arch/arm/boot/dts/bcm2837-rpi-3-a-plus.dts b/arch/arm/boot/dts/bcm2837-rpi-3-a-plus.dts index 66ab35eccba7..28be0332c1c8 100644 --- a/arch/arm/boot/dts/bcm2837-rpi-3-a-plus.dts +++ b/arch/arm/boot/dts/bcm2837-rpi-3-a-plus.dts @@ -26,6 +26,8 @@ pwr { label = "PWR"; gpios = <&expgpio 2 GPIO_ACTIVE_LOW>; + default-state = "keep"; + linux,default-trigger = "default-on"; }; }; }; diff --git a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts index 74ed6d047807..37343148643d 100644 --- a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts +++ b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts @@ -27,6 +27,8 @@ pwr { label = "PWR"; gpios = <&expgpio 2 GPIO_ACTIVE_LOW>; + default-state = "keep"; + linux,default-trigger = "default-on"; }; }; diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts index de7f85efaa51..af06a55d1c5c 100644 --- a/arch/arm/boot/dts/dra7-evm.dts +++ b/arch/arm/boot/dts/dra7-evm.dts @@ -61,10 +61,10 @@ regulator-max-microvolt = <1800000>; }; - evm_3v3: fixedregulator-evm3v3 { + vsys_3v3: fixedregulator-vsys3v3 { /* Output of Cntlr A of TPS43351-Q1 on dra7-evm */ compatible = "regulator-fixed"; - regulator-name = "evm_3v3"; + regulator-name = "vsys_3v3"; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; vin-supply = <&evm_12v0>; diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi index fc418834890d..2119a78e9c15 100644 --- a/arch/arm/boot/dts/dra7-l4.dtsi +++ b/arch/arm/boot/dts/dra7-l4.dtsi @@ -3474,6 +3474,7 @@ clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER13_CLKCTRL 24>; clock-names = "fck"; interrupts = <GIC_SPI 339 IRQ_TYPE_LEVEL_HIGH>; + ti,timer-pwm; }; }; @@ -3501,6 +3502,7 @@ clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER14_CLKCTRL 24>; clock-names = "fck"; interrupts = <GIC_SPI 340 IRQ_TYPE_LEVEL_HIGH>; + ti,timer-pwm; }; }; @@ -3528,6 +3530,7 @@ clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER15_CLKCTRL 24>; clock-names = "fck"; interrupts = <GIC_SPI 341 IRQ_TYPE_LEVEL_HIGH>; + ti,timer-pwm; }; }; @@ -3555,6 +3558,7 @@ clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER16_CLKCTRL 24>; clock-names = "fck"; interrupts = <GIC_SPI 342 IRQ_TYPE_LEVEL_HIGH>; + ti,timer-pwm; }; }; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index d78b684e7fca..4305051bb769 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -184,6 +184,7 @@ device_type = "pci"; ranges = <0x81000000 0 0 0x03000 0 0x00010000 0x82000000 0 0x20013000 0x13000 0 0xffed000>; + dma-ranges = <0x02000000 0x0 0x00000000 0x00000000 0x1 0x00000000>; bus-range = <0x00 0xff>; #interrupt-cells = <1>; num-lanes = <1>; @@ -238,6 +239,7 @@ device_type = "pci"; ranges = <0x81000000 0 0 0x03000 0 0x00010000 0x82000000 0 0x30013000 0x13000 0 0xffed000>; + dma-ranges = <0x02000000 0x0 0x00000000 0x00000000 0x1 0x00000000>; bus-range = <0x00 0xff>; #interrupt-cells = <1>; num-lanes = <1>; diff --git a/arch/arm/boot/dts/dra76x.dtsi b/arch/arm/boot/dts/dra76x.dtsi index 2f7539afef2b..42b8a205b64f 100644 --- a/arch/arm/boot/dts/dra76x.dtsi +++ b/arch/arm/boot/dts/dra76x.dtsi @@ -128,3 +128,8 @@ &usb4_tm { status = "disabled"; }; + +&mmc3 { + /* dra76x is not affected by i887 */ + max-frequency = <96000000>; +}; diff --git a/arch/arm/boot/dts/dra7xx-clocks.dtsi b/arch/arm/boot/dts/dra7xx-clocks.dtsi index 55cef4cac5f1..dc0a93bccbf1 100644 --- a/arch/arm/boot/dts/dra7xx-clocks.dtsi +++ b/arch/arm/boot/dts/dra7xx-clocks.dtsi @@ -796,16 +796,6 @@ clock-div = <1>; }; - ipu1_gfclk_mux: ipu1_gfclk_mux@520 { - #clock-cells = <0>; - compatible = "ti,mux-clock"; - clocks = <&dpll_abe_m2x2_ck>, <&dpll_core_h22x2_ck>; - ti,bit-shift = <24>; - reg = <0x0520>; - assigned-clocks = <&ipu1_gfclk_mux>; - assigned-clock-parents = <&dpll_core_h22x2_ck>; - }; - dummy_ck: dummy_ck { #clock-cells = <0>; compatible = "fixed-clock"; @@ -1564,6 +1554,8 @@ compatible = "ti,clkctrl"; reg = <0x20 0x4>; #clock-cells = <2>; + assigned-clocks = <&ipu1_clkctrl DRA7_IPU1_MMU_IPU1_CLKCTRL 24>; + assigned-clock-parents = <&dpll_core_h22x2_ck>; }; ipu_clkctrl: ipu-clkctrl@50 { diff --git a/arch/arm/boot/dts/imx6dl-colibri-eval-v3.dts b/arch/arm/boot/dts/imx6dl-colibri-eval-v3.dts index cd075621de52..84fcc203a2e4 100644 --- a/arch/arm/boot/dts/imx6dl-colibri-eval-v3.dts +++ b/arch/arm/boot/dts/imx6dl-colibri-eval-v3.dts @@ -275,7 +275,7 @@ /* SRAM on Colibri nEXT_CS0 */ sram@0,0 { - compatible = "cypress,cy7c1019dv33-10zsxi, mtd-ram"; + compatible = "cypress,cy7c1019dv33-10zsxi", "mtd-ram"; reg = <0 0 0x00010000>; #address-cells = <1>; #size-cells = <1>; @@ -286,7 +286,7 @@ /* SRAM on Colibri nEXT_CS1 */ sram@1,0 { - compatible = "cypress,cy7c1019dv33-10zsxi, mtd-ram"; + compatible = "cypress,cy7c1019dv33-10zsxi", "mtd-ram"; reg = <1 0 0x00010000>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi b/arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi index 978dc1c2ff1b..4d18952658f8 100644 --- a/arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi +++ b/arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi @@ -192,7 +192,6 @@ pinctrl-0 = <&pinctrl_usdhc4>; bus-width = <8>; non-removable; - vmmc-supply = <&vdd_emmc_1p8>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/imx7-colibri.dtsi b/arch/arm/boot/dts/imx7-colibri.dtsi index d05be3f0e2a7..04717cf69db0 100644 --- a/arch/arm/boot/dts/imx7-colibri.dtsi +++ b/arch/arm/boot/dts/imx7-colibri.dtsi @@ -336,7 +336,6 @@ assigned-clock-rates = <400000000>; bus-width = <8>; fsl,tuning-step = <2>; - max-frequency = <100000000>; vmmc-supply = <®_module_3v3>; vqmmc-supply = <®_DCDC3>; non-removable; diff --git a/arch/arm/boot/dts/imx7d.dtsi b/arch/arm/boot/dts/imx7d.dtsi index 92f6d0c2a74f..4c22828df55f 100644 --- a/arch/arm/boot/dts/imx7d.dtsi +++ b/arch/arm/boot/dts/imx7d.dtsi @@ -44,7 +44,7 @@ opp-hz = /bits/ 64 <792000000>; opp-microvolt = <1000000>; clock-latency-ns = <150000>; - opp-supported-hw = <0xd>, <0xf>; + opp-supported-hw = <0xd>, <0x7>; opp-suspend; }; @@ -52,7 +52,7 @@ opp-hz = /bits/ 64 <996000000>; opp-microvolt = <1100000>; clock-latency-ns = <150000>; - opp-supported-hw = <0xc>, <0xf>; + opp-supported-hw = <0xc>, <0x7>; opp-suspend; }; @@ -60,7 +60,7 @@ opp-hz = /bits/ 64 <1200000000>; opp-microvolt = <1225000>; clock-latency-ns = <150000>; - opp-supported-hw = <0x8>, <0xf>; + opp-supported-hw = <0x8>, <0x3>; opp-suspend; }; }; diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi index 0855b1fe98e0..760a68c163c8 100644 --- a/arch/arm/boot/dts/ls1021a.dtsi +++ b/arch/arm/boot/dts/ls1021a.dtsi @@ -747,7 +747,7 @@ }; mdio0: mdio@2d24000 { - compatible = "fsl,etsec2-mdio"; + compatible = "gianfar"; device_type = "mdio"; #address-cells = <1>; #size-cells = <0>; @@ -756,7 +756,7 @@ }; mdio1: mdio@2d64000 { - compatible = "fsl,etsec2-mdio"; + compatible = "gianfar"; device_type = "mdio"; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/motorola-mapphone-common.dtsi b/arch/arm/boot/dts/motorola-mapphone-common.dtsi index 85665506f4f8..b6e82b165f5c 100644 --- a/arch/arm/boot/dts/motorola-mapphone-common.dtsi +++ b/arch/arm/boot/dts/motorola-mapphone-common.dtsi @@ -182,6 +182,14 @@ pwm-names = "enable", "direction"; direction-duty-cycle-ns = <10000000>; }; + + backlight: backlight { + compatible = "led-backlight"; + + leds = <&backlight_led>; + brightness-levels = <31 63 95 127 159 191 223 255>; + default-brightness-level = <6>; + }; }; &dss { @@ -205,6 +213,8 @@ vddi-supply = <&lcd_regulator>; reset-gpios = <&gpio4 5 GPIO_ACTIVE_HIGH>; /* gpio101 */ + backlight = <&backlight>; + width-mm = <50>; height-mm = <89>; @@ -393,12 +403,11 @@ ramp-up-us = <1024>; ramp-down-us = <8193>; - led@0 { + backlight_led: led@0 { reg = <0>; led-sources = <2>; ti,led-mode = <0>; label = ":backlight"; - linux,default-trigger = "backlight"; }; led@1 { diff --git a/arch/arm/boot/dts/r8a7779.dtsi b/arch/arm/boot/dts/r8a7779.dtsi index beb9885e6ffc..c0999e27e9b1 100644 --- a/arch/arm/boot/dts/r8a7779.dtsi +++ b/arch/arm/boot/dts/r8a7779.dtsi @@ -377,7 +377,7 @@ }; sata: sata@fc600000 { - compatible = "renesas,sata-r8a7779", "renesas,rcar-sata"; + compatible = "renesas,sata-r8a7779"; reg = <0xfc600000 0x200000>; interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>; clocks = <&mstp1_clks R8A7779_CLK_SATA>; diff --git a/arch/arm/configs/bcm2835_defconfig b/arch/arm/configs/bcm2835_defconfig index 519ff58e67b3..0afcae9f7cf8 100644 --- a/arch/arm/configs/bcm2835_defconfig +++ b/arch/arm/configs/bcm2835_defconfig @@ -178,6 +178,7 @@ CONFIG_SCHED_TRACER=y CONFIG_STACK_TRACER=y CONFIG_FUNCTION_PROFILER=y CONFIG_TEST_KSTRTOX=y +CONFIG_DEBUG_FS=y CONFIG_KGDB=y CONFIG_KGDB_KDB=y CONFIG_STRICT_DEVMEM=y diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index c32c338f7704..847f9874ccc4 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -375,6 +375,7 @@ CONFIG_BACKLIGHT_GENERIC=m CONFIG_BACKLIGHT_PWM=m CONFIG_BACKLIGHT_PANDORA=m CONFIG_BACKLIGHT_GPIO=m +CONFIG_BACKLIGHT_LED=m CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y CONFIG_LOGO=y diff --git a/arch/arm/configs/socfpga_defconfig b/arch/arm/configs/socfpga_defconfig index fe2e1e82e233..e73c97b0f5b0 100644 --- a/arch/arm/configs/socfpga_defconfig +++ b/arch/arm/configs/socfpga_defconfig @@ -157,6 +157,7 @@ CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_FS=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_SCHED_DEBUG is not set CONFIG_FUNCTION_TRACER=y diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index c3314b286a61..a827b4d60d38 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -392,9 +392,6 @@ static inline void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) {} static inline void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) {} static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {} -static inline void kvm_arm_vhe_guest_enter(void) {} -static inline void kvm_arm_vhe_guest_exit(void) {} - #define KVM_BP_HARDEN_UNKNOWN -1 #define KVM_BP_HARDEN_WA_NEEDED 0 #define KVM_BP_HARDEN_NOT_REQUIRED 1 diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index 35ff620537e6..03506ce46149 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -91,6 +91,8 @@ AFLAGS_suspend-imx6.o :=-Wa,-march=armv7-a obj-$(CONFIG_SOC_IMX6) += suspend-imx6.o obj-$(CONFIG_SOC_IMX53) += suspend-imx53.o endif +AFLAGS_resume-imx6.o :=-Wa,-march=armv7-a +obj-$(CONFIG_SOC_IMX6) += resume-imx6.o obj-$(CONFIG_SOC_IMX6) += pm-imx6.o obj-$(CONFIG_SOC_IMX1) += mach-imx1.o diff --git a/arch/arm/mach-imx/common.h b/arch/arm/mach-imx/common.h index 912aeceb4ff8..5aa5796cff0e 100644 --- a/arch/arm/mach-imx/common.h +++ b/arch/arm/mach-imx/common.h @@ -109,17 +109,17 @@ void imx_cpu_die(unsigned int cpu); int imx_cpu_kill(unsigned int cpu); #ifdef CONFIG_SUSPEND -void v7_cpu_resume(void); void imx53_suspend(void __iomem *ocram_vbase); extern const u32 imx53_suspend_sz; void imx6_suspend(void __iomem *ocram_vbase); #else -static inline void v7_cpu_resume(void) {} static inline void imx53_suspend(void __iomem *ocram_vbase) {} static const u32 imx53_suspend_sz; static inline void imx6_suspend(void __iomem *ocram_vbase) {} #endif +void v7_cpu_resume(void); + void imx6_pm_ccm_init(const char *ccm_compat); void imx6q_pm_init(void); void imx6dl_pm_init(void); diff --git a/arch/arm/mach-imx/resume-imx6.S b/arch/arm/mach-imx/resume-imx6.S new file mode 100644 index 000000000000..5bd1ba7ef15b --- /dev/null +++ b/arch/arm/mach-imx/resume-imx6.S @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2014 Freescale Semiconductor, Inc. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hardware/cache-l2x0.h> +#include "hardware.h" + +/* + * The following code must assume it is running from physical address + * where absolute virtual addresses to the data section have to be + * turned into relative ones. + */ + +ENTRY(v7_cpu_resume) + bl v7_invalidate_l1 +#ifdef CONFIG_CACHE_L2X0 + bl l2c310_early_resume +#endif + b cpu_resume +ENDPROC(v7_cpu_resume) diff --git a/arch/arm/mach-imx/suspend-imx6.S b/arch/arm/mach-imx/suspend-imx6.S index 062391ff13da..1eabf2d2834b 100644 --- a/arch/arm/mach-imx/suspend-imx6.S +++ b/arch/arm/mach-imx/suspend-imx6.S @@ -327,17 +327,3 @@ resume: ret lr ENDPROC(imx6_suspend) - -/* - * The following code must assume it is running from physical address - * where absolute virtual addresses to the data section have to be - * turned into relative ones. - */ - -ENTRY(v7_cpu_resume) - bl v7_invalidate_l1 -#ifdef CONFIG_CACHE_L2X0 - bl l2c310_early_resume -#endif - b cpu_resume -ENDPROC(v7_cpu_resume) diff --git a/arch/arm/mach-meson/Kconfig b/arch/arm/mach-meson/Kconfig index 01f0f4b765e0..75034fe197e3 100644 --- a/arch/arm/mach-meson/Kconfig +++ b/arch/arm/mach-meson/Kconfig @@ -9,7 +9,6 @@ menuconfig ARCH_MESON select CACHE_L2X0 select PINCTRL select PINCTRL_MESON - select COMMON_CLK select HAVE_ARM_SCU if SMP select HAVE_ARM_TWD if SMP diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index e1135b9d67c6..5017a3be0ff0 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -16,7 +16,7 @@ hwmod-common = omap_hwmod.o omap_hwmod_reset.o \ clock-common = clock.o secure-common = omap-smc.o omap-secure.o -obj-$(CONFIG_ARCH_OMAP2) += $(omap-2-3-common) $(hwmod-common) $(secure-common) +obj-$(CONFIG_ARCH_OMAP2) += $(omap-2-3-common) $(hwmod-common) obj-$(CONFIG_ARCH_OMAP3) += $(omap-2-3-common) $(hwmod-common) $(secure-common) obj-$(CONFIG_ARCH_OMAP4) += $(hwmod-common) $(secure-common) obj-$(CONFIG_SOC_AM33XX) += $(hwmod-common) $(secure-common) diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c index f28047233665..27608d1026cb 100644 --- a/arch/arm/mach-omap2/io.c +++ b/arch/arm/mach-omap2/io.c @@ -431,7 +431,6 @@ void __init omap2420_init_early(void) omap_hwmod_init_postsetup(); omap_clk_soc_init = omap2420_dt_clk_init; rate_table = omap2420_rate_table; - omap_secure_init(); } void __init omap2420_init_late(void) @@ -456,7 +455,6 @@ void __init omap2430_init_early(void) omap_hwmod_init_postsetup(); omap_clk_soc_init = omap2430_dt_clk_init; rate_table = omap2430_rate_table; - omap_secure_init(); } void __init omap2430_init_late(void) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts index f82f25c1a5f9..d5dc12878dfe 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts @@ -327,7 +327,7 @@ #size-cells = <0>; bus-width = <4>; - max-frequency = <50000000>; + max-frequency = <60000000>; non-removable; disable-wp; diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts index a8bb3fa9fec9..cb1b48f5b8b1 100644 --- a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts +++ b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts @@ -593,6 +593,7 @@ compatible = "brcm,bcm43438-bt"; interrupt-parent = <&gpio_intc>; interrupts = <95 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "host-wakeup"; shutdown-gpios = <&gpio GPIOX_17 GPIO_ACTIVE_HIGH>; max-speed = <2000000>; clocks = <&wifi32k>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi index 6082ae022136..d237162a8744 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi @@ -20,6 +20,8 @@ }; &fman0 { + fsl,erratum-a050385; + /* these aliases provide the FMan ports mapping */ enet0: ethernet@e0000 { }; diff --git a/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts b/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts index d3d26cca7d52..13460a360c6a 100644 --- a/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts +++ b/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts @@ -52,11 +52,6 @@ compatible = "ethernet-phy-ieee802.3-c22"; reg = <0>; }; - - ethphy1: ethernet-phy@1 { - compatible = "ethernet-phy-ieee802.3-c22"; - reg = <1>; - }; }; }; diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi index e1d357eaad7c..d8c44d3ca15a 100644 --- a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi +++ b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi @@ -102,7 +102,7 @@ }; gmac0: ethernet@ff800000 { - compatible = "altr,socfpga-stmmac", "snps,dwmac-3.74a", "snps,dwmac"; + compatible = "altr,socfpga-stmmac-a10-s10", "snps,dwmac-3.74a", "snps,dwmac"; reg = <0xff800000 0x2000>; interrupts = <0 90 4>; interrupt-names = "macirq"; @@ -118,7 +118,7 @@ }; gmac1: ethernet@ff802000 { - compatible = "altr,socfpga-stmmac", "snps,dwmac-3.74a", "snps,dwmac"; + compatible = "altr,socfpga-stmmac-a10-s10", "snps,dwmac-3.74a", "snps,dwmac"; reg = <0xff802000 0x2000>; interrupts = <0 91 4>; interrupt-names = "macirq"; @@ -134,7 +134,7 @@ }; gmac2: ethernet@ff804000 { - compatible = "altr,socfpga-stmmac", "snps,dwmac-3.74a", "snps,dwmac"; + compatible = "altr,socfpga-stmmac-a10-s10", "snps,dwmac-3.74a", "snps,dwmac"; reg = <0xff804000 0x2000>; interrupts = <0 92 4>; interrupt-names = "macirq"; diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 58fd1c611849..d42302b8889b 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -675,17 +675,6 @@ interrupt-controller; #interrupt-cells = <2>; }; - - ipa_smp2p_out: ipa-ap-to-modem { - qcom,entry-name = "ipa"; - #qcom,smem-state-cells = <1>; - }; - - ipa_smp2p_in: ipa-modem-to-ap { - qcom,entry-name = "ipa"; - interrupt-controller; - #interrupt-cells = <2>; - }; }; smp2p-slpi { @@ -1446,46 +1435,6 @@ }; }; - ipa@1e40000 { - compatible = "qcom,sdm845-ipa"; - - modem-init; - modem-remoteproc = <&mss_pil>; - - reg = <0 0x1e40000 0 0x7000>, - <0 0x1e47000 0 0x2000>, - <0 0x1e04000 0 0x2c000>; - reg-names = "ipa-reg", - "ipa-shared", - "gsi"; - - interrupts-extended = - <&intc 0 311 IRQ_TYPE_EDGE_RISING>, - <&intc 0 432 IRQ_TYPE_LEVEL_HIGH>, - <&ipa_smp2p_in 0 IRQ_TYPE_EDGE_RISING>, - <&ipa_smp2p_in 1 IRQ_TYPE_EDGE_RISING>; - interrupt-names = "ipa", - "gsi", - "ipa-clock-query", - "ipa-setup-ready"; - - clocks = <&rpmhcc RPMH_IPA_CLK>; - clock-names = "core"; - - interconnects = - <&rsc_hlos MASTER_IPA &rsc_hlos SLAVE_EBI1>, - <&rsc_hlos MASTER_IPA &rsc_hlos SLAVE_IMEM>, - <&rsc_hlos MASTER_APPSS_PROC &rsc_hlos SLAVE_IPA_CFG>; - interconnect-names = "memory", - "imem", - "config"; - - qcom,smem-states = <&ipa_smp2p_out 0>, - <&ipa_smp2p_out 1>; - qcom,smem-state-names = "ipa-clock-enabled-valid", - "ipa-clock-enabled"; - }; - tcsr_mutex_regs: syscon@1f40000 { compatible = "syscon"; reg = <0 0x01f40000 0 0x40000>; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 905109f6814f..4db223dbc549 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -773,7 +773,7 @@ CONFIG_ARCH_R8A774A1=y CONFIG_ARCH_R8A774B1=y CONFIG_ARCH_R8A774C0=y CONFIG_ARCH_R8A7795=y -CONFIG_ARCH_R8A7796=y +CONFIG_ARCH_R8A77960=y CONFIG_ARCH_R8A77961=y CONFIG_ARCH_R8A77965=y CONFIG_ARCH_R8A77970=y diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 25fec4bde43a..a358e97572c1 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -32,7 +32,7 @@ static inline void gic_write_eoir(u32 irq) isb(); } -static inline void gic_write_dir(u32 irq) +static __always_inline void gic_write_dir(u32 irq) { write_sysreg_s(irq, SYS_ICC_DIR_EL1); isb(); diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 806e9dc2a852..a4d1b5f771f6 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -69,7 +69,7 @@ static inline int icache_is_aliasing(void) return test_bit(ICACHEF_ALIASING, &__icache_flags); } -static inline int icache_is_vpipt(void) +static __always_inline int icache_is_vpipt(void) { return test_bit(ICACHEF_VPIPT, &__icache_flags); } diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 665c78e0665a..e6cca3d4acf7 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -145,7 +145,7 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *, #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *); -static inline void __flush_icache_all(void) +static __always_inline void __flush_icache_all(void) { if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC)) return; diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 92ef9539874a..2a746b99e937 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -435,13 +435,13 @@ cpuid_feature_extract_signed_field(u64 features, int field) return cpuid_feature_extract_signed_field_width(features, field, 4); } -static inline unsigned int __attribute_const__ +static __always_inline unsigned int __attribute_const__ cpuid_feature_extract_unsigned_field_width(u64 features, int field, int width) { return (u64)(features << (64 - width - field)) >> (64 - width); } -static inline unsigned int __attribute_const__ +static __always_inline unsigned int __attribute_const__ cpuid_feature_extract_unsigned_field(u64 features, int field) { return cpuid_feature_extract_unsigned_field_width(features, field, 4); @@ -564,7 +564,7 @@ static inline bool system_supports_mixed_endian(void) return val == 0x1; } -static inline bool system_supports_fpsimd(void) +static __always_inline bool system_supports_fpsimd(void) { return !cpus_have_const_cap(ARM64_HAS_NO_FPSIMD); } @@ -575,13 +575,13 @@ static inline bool system_uses_ttbr0_pan(void) !cpus_have_const_cap(ARM64_HAS_PAN); } -static inline bool system_supports_sve(void) +static __always_inline bool system_supports_sve(void) { return IS_ENABLED(CONFIG_ARM64_SVE) && cpus_have_const_cap(ARM64_SVE); } -static inline bool system_supports_cnp(void) +static __always_inline bool system_supports_cnp(void) { return IS_ENABLED(CONFIG_ARM64_CNP) && cpus_have_const_cap(ARM64_HAS_CNP); diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 4e531f57147d..6facd1308e7c 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -34,7 +34,7 @@ static inline void __raw_writew(u16 val, volatile void __iomem *addr) } #define __raw_writel __raw_writel -static inline void __raw_writel(u32 val, volatile void __iomem *addr) +static __always_inline void __raw_writel(u32 val, volatile void __iomem *addr) { asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr)); } @@ -69,7 +69,7 @@ static inline u16 __raw_readw(const volatile void __iomem *addr) } #define __raw_readl __raw_readl -static inline u32 __raw_readl(const volatile void __iomem *addr) +static __always_inline u32 __raw_readl(const volatile void __iomem *addr) { u32 val; asm volatile(ALTERNATIVE("ldr %w0, [%1]", diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 688c63412cc2..f658dda12364 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -36,7 +36,7 @@ void kvm_inject_undef32(struct kvm_vcpu *vcpu); void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr); -static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) +static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) { return !(vcpu->arch.hcr_el2 & HCR_RW); } @@ -127,7 +127,7 @@ static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr) vcpu->arch.vsesr_el2 = vsesr; } -static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) +static __always_inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) { return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; } @@ -153,17 +153,17 @@ static inline void vcpu_write_elr_el1(const struct kvm_vcpu *vcpu, unsigned long *__vcpu_elr_el1(vcpu) = v; } -static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) +static __always_inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) { return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate; } -static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) +static __always_inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) { return !!(*vcpu_cpsr(vcpu) & PSR_MODE32_BIT); } -static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu) +static __always_inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) return kvm_condition_valid32(vcpu); @@ -181,13 +181,13 @@ static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) * coming from a read of ESR_EL2. Otherwise, it may give the wrong result on * AArch32 with banked registers. */ -static inline unsigned long vcpu_get_reg(const struct kvm_vcpu *vcpu, +static __always_inline unsigned long vcpu_get_reg(const struct kvm_vcpu *vcpu, u8 reg_num) { return (reg_num == 31) ? 0 : vcpu_gp_regs(vcpu)->regs.regs[reg_num]; } -static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num, +static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num, unsigned long val) { if (reg_num != 31) @@ -264,12 +264,12 @@ static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) return mode != PSR_MODE_EL0t; } -static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu) +static __always_inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu) { return vcpu->arch.fault.esr_el2; } -static inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu) +static __always_inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu) { u32 esr = kvm_vcpu_get_hsr(vcpu); @@ -279,12 +279,12 @@ static inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu) return -1; } -static inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vcpu) +static __always_inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vcpu) { return vcpu->arch.fault.far_el2; } -static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu) +static __always_inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu) { return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8; } @@ -299,7 +299,7 @@ static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_xVC_IMM_MASK; } -static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu) +static __always_inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu) { return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV); } @@ -319,17 +319,17 @@ static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SF); } -static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) +static __always_inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) { return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; } -static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) +static __always_inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) { return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW); } -static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu) +static __always_inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu) { return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WNR) || kvm_vcpu_dabt_iss1tw(vcpu); /* AF/DBM update */ @@ -340,18 +340,18 @@ static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM); } -static inline unsigned int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) +static __always_inline unsigned int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) { return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT); } /* This one is not specific to Data Abort */ -static inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu) +static __always_inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu) { return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_IL); } -static inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu) +static __always_inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu) { return ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu)); } @@ -361,17 +361,17 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) return kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_IABT_LOW; } -static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) +static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) { return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC; } -static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) +static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) { return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE; } -static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) +static __always_inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) { switch (kvm_vcpu_trap_get_fault(vcpu)) { case FSC_SEA: @@ -390,7 +390,7 @@ static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) } } -static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) +static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) { u32 esr = kvm_vcpu_get_hsr(vcpu); return ESR_ELx_SYS64_ISS_RT(esr); @@ -504,7 +504,7 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, return data; /* Leave LE untouched */ } -static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) +static __always_inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) { if (vcpu_mode_is_32bit(vcpu)) kvm_skip_instr32(vcpu, is_wide_instr); @@ -519,7 +519,7 @@ static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) * Skip an instruction which has been emulated at hyp while most guest sysregs * are live. */ -static inline void __hyp_text __kvm_skip_instr(struct kvm_vcpu *vcpu) +static __always_inline void __hyp_text __kvm_skip_instr(struct kvm_vcpu *vcpu) { *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d87aa609d2b6..57fd46acd058 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -626,38 +626,6 @@ static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {} static inline void kvm_clr_pmu_events(u32 clr) {} #endif -static inline void kvm_arm_vhe_guest_enter(void) -{ - local_daif_mask(); - - /* - * Having IRQs masked via PMR when entering the guest means the GIC - * will not signal the CPU of interrupts of lower priority, and the - * only way to get out will be via guest exceptions. - * Naturally, we want to avoid this. - * - * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a - * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU. - */ - pmr_sync(); -} - -static inline void kvm_arm_vhe_guest_exit(void) -{ - /* - * local_daif_restore() takes care to properly restore PSTATE.DAIF - * and the GIC PMR if the host is using IRQ priorities. - */ - local_daif_restore(DAIF_PROCCTX_NOIRQ); - - /* - * When we exit from the guest we change a number of CPU configuration - * parameters, such as traps. Make sure these changes take effect - * before running the host or additional guests. - */ - isb(); -} - #define KVM_BP_HARDEN_UNKNOWN -1 #define KVM_BP_HARDEN_WA_NEEDED 0 #define KVM_BP_HARDEN_NOT_REQUIRED 1 diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index a3a6a2ba9a63..fe57f60f06a8 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -47,6 +47,13 @@ #define read_sysreg_el2(r) read_sysreg_elx(r, _EL2, _EL1) #define write_sysreg_el2(v,r) write_sysreg_elx(v, r, _EL2, _EL1) +/* + * Without an __arch_swab32(), we fall back to ___constant_swab32(), but the + * static inline can allow the compiler to out-of-line this. KVM always wants + * the macro version as its always inlined. + */ +#define __kvm_swab32(x) ___constant_swab32(x) + int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); void __vgic_v3_save_state(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 53d846f1bfe7..785762860c63 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -93,7 +93,7 @@ void kvm_update_va_mask(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); void kvm_compute_layout(void); -static inline unsigned long __kern_hyp_va(unsigned long v) +static __always_inline unsigned long __kern_hyp_va(unsigned long v) { asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n" "ror %0, %0, #1\n" @@ -473,6 +473,7 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, extern void *__kvm_bp_vect_base; extern int __kvm_harden_el2_vector_slot; +/* This is only called on a VHE system */ static inline void *kvm_get_hyp_vector(void) { struct bp_hardening_data *data = arm64_get_bp_hardening_data(); diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 0958ed6191aa..61fd26752adc 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -83,7 +83,7 @@ static inline bool is_kernel_in_hyp_mode(void) return read_sysreg(CurrentEL) == CurrentEL_EL2; } -static inline bool has_vhe(void) +static __always_inline bool has_vhe(void) { if (cpus_have_const_cap(ARM64_HAS_VIRT_HOST_EXTN)) return true; diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index dfe8dd172512..925086b46136 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -625,7 +625,7 @@ static void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) } /* Switch to the guest for VHE systems running in EL2 */ -int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) +static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; struct kvm_cpu_context *guest_ctxt; @@ -678,7 +678,42 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) return exit_code; } -NOKPROBE_SYMBOL(kvm_vcpu_run_vhe); +NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe); + +int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) +{ + int ret; + + local_daif_mask(); + + /* + * Having IRQs masked via PMR when entering the guest means the GIC + * will not signal the CPU of interrupts of lower priority, and the + * only way to get out will be via guest exceptions. + * Naturally, we want to avoid this. + * + * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a + * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU. + */ + pmr_sync(); + + ret = __kvm_vcpu_run_vhe(vcpu); + + /* + * local_daif_restore() takes care to properly restore PSTATE.DAIF + * and the GIC PMR if the host is using IRQ priorities. + */ + local_daif_restore(DAIF_PROCCTX_NOIRQ); + + /* + * When we exit from the guest we change a number of CPU configuration + * parameters, such as traps. Make sure these changes take effect + * before running the host or additional guests. + */ + isb(); + + return ret; +} /* Switch to the guest for legacy non-VHE systems */ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c index 29ee1feba4eb..4f3a087e36d5 100644 --- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -69,14 +69,14 @@ int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) u32 data = vcpu_get_reg(vcpu, rd); if (__is_be(vcpu)) { /* guest pre-swabbed data, undo this for writel() */ - data = swab32(data); + data = __kvm_swab32(data); } writel_relaxed(data, addr); } else { u32 data = readl_relaxed(addr); if (__is_be(vcpu)) { /* guest expects swabbed data */ - data = swab32(data); + data = __kvm_swab32(data); } vcpu_set_reg(vcpu, rd, data); } diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 8ef73e89d514..d89bb22589f6 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -260,14 +260,26 @@ asmlinkage void post_ttbr_update_workaround(void) CONFIG_CAVIUM_ERRATUM_27456)); } -static int asids_init(void) +static int asids_update_limit(void) { - asid_bits = get_cpu_asid_bits(); + unsigned long num_available_asids = NUM_USER_ASIDS; + + if (arm64_kernel_unmapped_at_el0()) + num_available_asids /= 2; /* * Expect allocation after rollover to fail if we don't have at least * one more ASID than CPUs. ASID #0 is reserved for init_mm. */ - WARN_ON(NUM_USER_ASIDS - 1 <= num_possible_cpus()); + WARN_ON(num_available_asids - 1 <= num_possible_cpus()); + pr_info("ASID allocator initialised with %lu entries\n", + num_available_asids); + return 0; +} +arch_initcall(asids_update_limit); + +static int asids_init(void) +{ + asid_bits = get_cpu_asid_bits(); atomic64_set(&asid_generation, ASID_FIRST_VERSION); asid_map = kcalloc(BITS_TO_LONGS(NUM_USER_ASIDS), sizeof(*asid_map), GFP_KERNEL); @@ -282,8 +294,6 @@ static int asids_init(void) */ if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) set_kpti_asid_bits(); - - pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS); return 0; } early_initcall(asids_init); diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts index 37b93166bf22..c340f947baa0 100644 --- a/arch/mips/boot/dts/ingenic/ci20.dts +++ b/arch/mips/boot/dts/ingenic/ci20.dts @@ -4,6 +4,8 @@ #include "jz4780.dtsi" #include <dt-bindings/clock/ingenic,tcu.h> #include <dt-bindings/gpio/gpio.h> +#include <dt-bindings/interrupt-controller/irq.h> +#include <dt-bindings/regulator/active-semi,8865-regulator.h> / { compatible = "img,ci20", "ingenic,jz4780"; @@ -163,63 +165,71 @@ regulators { vddcore: SUDCDC1 { - regulator-name = "VDDCORE"; + regulator-name = "DCDC_REG1"; regulator-min-microvolt = <1100000>; regulator-max-microvolt = <1100000>; regulator-always-on; }; vddmem: SUDCDC2 { - regulator-name = "VDDMEM"; + regulator-name = "DCDC_REG2"; regulator-min-microvolt = <1500000>; regulator-max-microvolt = <1500000>; regulator-always-on; }; vcc_33: SUDCDC3 { - regulator-name = "VCC33"; + regulator-name = "DCDC_REG3"; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; regulator-always-on; }; vcc_50: SUDCDC4 { - regulator-name = "VCC50"; + regulator-name = "SUDCDC_REG4"; regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; regulator-always-on; }; vcc_25: LDO_REG5 { - regulator-name = "VCC25"; + regulator-name = "LDO_REG5"; regulator-min-microvolt = <2500000>; regulator-max-microvolt = <2500000>; regulator-always-on; }; wifi_io: LDO_REG6 { - regulator-name = "WIFIIO"; + regulator-name = "LDO_REG6"; regulator-min-microvolt = <2500000>; regulator-max-microvolt = <2500000>; regulator-always-on; }; vcc_28: LDO_REG7 { - regulator-name = "VCC28"; + regulator-name = "LDO_REG7"; regulator-min-microvolt = <2800000>; regulator-max-microvolt = <2800000>; regulator-always-on; }; vcc_15: LDO_REG8 { - regulator-name = "VCC15"; + regulator-name = "LDO_REG8"; regulator-min-microvolt = <1500000>; regulator-max-microvolt = <1500000>; regulator-always-on; }; - vcc_18: LDO_REG9 { - regulator-name = "VCC18"; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; + vrtc_18: LDO_REG9 { + regulator-name = "LDO_REG9"; + /* Despite the datasheet stating 3.3V + * for REG9 and the driver expecting that, + * REG9 outputs 1.8V. + * Likely the CI20 uses a proprietary + * factory programmed chip variant. + * Since this is a simple on/off LDO the + * exact values do not matter. + */ + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-always-on; }; vcc_11: LDO_REG10 { - regulator-name = "VCC11"; - regulator-min-microvolt = <1100000>; - regulator-max-microvolt = <1100000>; + regulator-name = "LDO_REG10"; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <1200000>; regulator-always-on; }; }; @@ -261,7 +271,9 @@ rtc@51 { compatible = "nxp,pcf8563"; reg = <0x51>; - interrupts = <110>; + + interrupt-parent = <&gpf>; + interrupts = <30 IRQ_TYPE_LEVEL_LOW>; }; }; diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 1ac2752fb791..a7b469d89e2c 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -605,7 +605,8 @@ static void __init bootcmdline_init(char **cmdline_p) * If we're configured to take boot arguments from DT, look for those * now. */ - if (IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_DTB)) + if (IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_DTB) || + IS_ENABLED(CONFIG_MIPS_CMDLINE_DTB_EXTEND)) of_scan_flat_dt(bootcmdline_scan_chosen, &dt_bootargs); #endif diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index e745abc5457a..245be4fafe13 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -2193,11 +2193,13 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset, * oprofile_cpu_type already has a value, then we are * possibly overriding a real PVR with a logical one, * and, in that case, keep the current value for - * oprofile_cpu_type. + * oprofile_cpu_type. Futhermore, let's ensure that the + * fix for the PMAO bug is enabled on compatibility mode. */ if (old.oprofile_cpu_type != NULL) { t->oprofile_cpu_type = old.oprofile_cpu_type; t->oprofile_type = old.oprofile_type; + t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG; } } diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 2462cd7c565c..d0854320bb50 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -331,11 +331,13 @@ int hw_breakpoint_handler(struct die_args *args) } info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; - if (!dar_within_range(regs->dar, info)) - info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; - - if (!IS_ENABLED(CONFIG_PPC_8xx) && !stepping_handler(regs, bp, info)) - goto out; + if (IS_ENABLED(CONFIG_PPC_8xx)) { + if (!dar_within_range(regs->dar, info)) + info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + } else { + if (!stepping_handler(regs, bp, info)) + goto out; + } /* * As a policy, the callback is invoked in a 'trigger-after-execute' diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index b4c89a1acebb..a32d478a7f41 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -303,6 +303,12 @@ SECTIONS *(.branch_lt) } +#ifdef CONFIG_DEBUG_INFO_BTF + .BTF : AT(ADDR(.BTF) - LOAD_OFFSET) { + *(.BTF) + } +#endif + .opd : AT(ADDR(.opd) - LOAD_OFFSET) { __start_opd = .; KEEP(*(.opd)) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index ef7b1119b2e2..1c07d5a3f543 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -373,7 +373,9 @@ static inline bool flush_coherent_icache(unsigned long addr) */ if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { mb(); /* sync */ + allow_read_from_user((const void __user *)addr, L1_CACHE_BYTES); icbi((void *)addr); + prevent_read_from_user((const void __user *)addr, L1_CACHE_BYTES); mb(); /* sync */ isync(); return true; diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 73f029eae0cc..2366698df179 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -56,7 +56,7 @@ config RISCV select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_MMIOWB select ARCH_HAS_DEBUG_VIRTUAL - select HAVE_EBPF_JIT if 64BIT + select HAVE_EBPF_JIT select EDAC_SUPPORT select ARCH_HAS_GIGANTIC_PAGE select ARCH_WANT_HUGE_PMD_SHARE if 64BIT @@ -121,6 +121,7 @@ config ARCH_FLATMEM_ENABLE config ARCH_SPARSEMEM_ENABLE def_bool y + depends on MMU select SPARSEMEM_VMEMMAP_ENABLE config ARCH_SELECT_MEMORY_MODEL diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index d325b67d00df..3078b2de0b2d 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -10,4 +10,28 @@ config SOC_SIFIVE help This enables support for SiFive SoC platform hardware. +config SOC_VIRT + bool "QEMU Virt Machine" + select VIRTIO_PCI + select VIRTIO_BALLOON + select VIRTIO_MMIO + select VIRTIO_CONSOLE + select VIRTIO_NET + select NET_9P_VIRTIO + select VIRTIO_BLK + select SCSI_VIRTIO + select DRM_VIRTIO_GPU + select HW_RANDOM_VIRTIO + select RPMSG_CHAR + select RPMSG_VIRTIO + select CRYPTO_DEV_VIRTIO + select VIRTIO_INPUT + select POWER_RESET_SYSCON + select POWER_RESET_SYSCON_POWEROFF + select GOLDFISH + select RTC_DRV_GOLDFISH + select SIFIVE_PLIC + help + This enables support for QEMU Virt Machine. + endmenu diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index b9009a2fbaf5..259cb53d7f20 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -13,8 +13,10 @@ LDFLAGS_vmlinux := ifeq ($(CONFIG_DYNAMIC_FTRACE),y) LDFLAGS_vmlinux := --no-relax endif -KBUILD_AFLAGS_MODULE += -fPIC -KBUILD_CFLAGS_MODULE += -fPIC + +ifeq ($(CONFIG_64BIT)$(CONFIG_CMODEL_MEDLOW),yy) +KBUILD_CFLAGS_MODULE += -mcmodel=medany +endif export BITS ifeq ($(CONFIG_ARCH_RV64I),y) diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index 609198cb1163..4a2729f5ca3f 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -2,6 +2,7 @@ /* Copyright (c) 2018-2019 SiFive, Inc */ #include "fu540-c000.dtsi" +#include <dt-bindings/gpio/gpio.h> /* Clock frequency (in Hz) of the PCB crystal for rtcclk */ #define RTCCLK_FREQ 1000000 @@ -41,6 +42,10 @@ clock-frequency = <RTCCLK_FREQ>; clock-output-names = "rtcclk"; }; + gpio-restart { + compatible = "gpio-restart"; + gpios = <&gpio 10 GPIO_ACTIVE_LOW>; + }; }; &uart0 { diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index e2ff95cb3390..c8f084203067 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -15,6 +15,7 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y CONFIG_BPF_SYSCALL=y CONFIG_SOC_SIFIVE=y +CONFIG_SOC_VIRT=y CONFIG_SMP=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -30,7 +31,6 @@ CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_NETLINK_DIAG=y CONFIG_NET_9P=y -CONFIG_NET_9P_VIRTIO=y CONFIG_PCI=y CONFIG_PCIEPORTBUS=y CONFIG_PCI_HOST_GENERIC=y @@ -38,15 +38,12 @@ CONFIG_PCIE_XILINX=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_BLK_DEV_LOOP=y -CONFIG_VIRTIO_BLK=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y -CONFIG_SCSI_VIRTIO=y CONFIG_ATA=y CONFIG_SATA_AHCI=y CONFIG_SATA_AHCI_PLATFORM=y CONFIG_NETDEVICES=y -CONFIG_VIRTIO_NET=y CONFIG_MACB=y CONFIG_E1000E=y CONFIG_R8169=y @@ -57,15 +54,13 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_EARLYCON_RISCV_SBI=y CONFIG_HVC_RISCV_SBI=y -CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y -CONFIG_HW_RANDOM_VIRTIO=y CONFIG_SPI=y CONFIG_SPI_SIFIVE=y # CONFIG_PTP_1588_CLOCK is not set +CONFIG_POWER_RESET=y CONFIG_DRM=y CONFIG_DRM_RADEON=y -CONFIG_DRM_VIRTIO_GPU=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_USB=y CONFIG_USB_XHCI_HCD=y @@ -78,12 +73,7 @@ CONFIG_USB_STORAGE=y CONFIG_USB_UAS=y CONFIG_MMC=y CONFIG_MMC_SPI=y -CONFIG_VIRTIO_PCI=y -CONFIG_VIRTIO_BALLOON=y -CONFIG_VIRTIO_INPUT=y -CONFIG_VIRTIO_MMIO=y -CONFIG_RPMSG_CHAR=y -CONFIG_RPMSG_VIRTIO=y +CONFIG_RTC_CLASS=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS4_FS=y @@ -98,7 +88,6 @@ CONFIG_NFS_V4_2=y CONFIG_ROOT_NFS=y CONFIG_9P_FS=y CONFIG_CRYPTO_USER_API_HASH=y -CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_PAGEALLOC=y diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig index eb519407c841..a844920a261f 100644 --- a/arch/riscv/configs/rv32_defconfig +++ b/arch/riscv/configs/rv32_defconfig @@ -14,6 +14,7 @@ CONFIG_CHECKPOINT_RESTORE=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y CONFIG_BPF_SYSCALL=y +CONFIG_SOC_VIRT=y CONFIG_ARCH_RV32I=y CONFIG_SMP=y CONFIG_MODULES=y @@ -30,7 +31,6 @@ CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_NETLINK_DIAG=y CONFIG_NET_9P=y -CONFIG_NET_9P_VIRTIO=y CONFIG_PCI=y CONFIG_PCIEPORTBUS=y CONFIG_PCI_HOST_GENERIC=y @@ -38,15 +38,12 @@ CONFIG_PCIE_XILINX=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_BLK_DEV_LOOP=y -CONFIG_VIRTIO_BLK=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y -CONFIG_SCSI_VIRTIO=y CONFIG_ATA=y CONFIG_SATA_AHCI=y CONFIG_SATA_AHCI_PLATFORM=y CONFIG_NETDEVICES=y -CONFIG_VIRTIO_NET=y CONFIG_MACB=y CONFIG_E1000E=y CONFIG_R8169=y @@ -57,13 +54,11 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_EARLYCON_RISCV_SBI=y CONFIG_HVC_RISCV_SBI=y -CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y -CONFIG_HW_RANDOM_VIRTIO=y # CONFIG_PTP_1588_CLOCK is not set +CONFIG_POWER_RESET=y CONFIG_DRM=y CONFIG_DRM_RADEON=y -CONFIG_DRM_VIRTIO_GPU=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_USB=y CONFIG_USB_XHCI_HCD=y @@ -74,13 +69,7 @@ CONFIG_USB_OHCI_HCD=y CONFIG_USB_OHCI_HCD_PLATFORM=y CONFIG_USB_STORAGE=y CONFIG_USB_UAS=y -CONFIG_VIRTIO_PCI=y -CONFIG_VIRTIO_BALLOON=y -CONFIG_VIRTIO_INPUT=y -CONFIG_VIRTIO_MMIO=y -CONFIG_RPMSG_CHAR=y -CONFIG_RPMSG_VIRTIO=y -CONFIG_SIFIVE_PLIC=y +CONFIG_RTC_CLASS=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS4_FS=y @@ -95,7 +84,6 @@ CONFIG_NFS_V4_2=y CONFIG_ROOT_NFS=y CONFIG_9P_FS=y CONFIG_CRYPTO_USER_API_HASH=y -CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_PAGEALLOC=y diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h index 42347d0981e7..49350c8bd7b0 100644 --- a/arch/riscv/include/asm/syscall.h +++ b/arch/riscv/include/asm/syscall.h @@ -28,13 +28,6 @@ static inline int syscall_get_nr(struct task_struct *task, return regs->a7; } -static inline void syscall_set_nr(struct task_struct *task, - struct pt_regs *regs, - int sysno) -{ - regs->a7 = sysno; -} - static inline void syscall_rollback(struct task_struct *task, struct pt_regs *regs) { diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index bad4d85b5e91..208702d8c18e 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -229,19 +229,12 @@ check_syscall_nr: li t0, __NR_syscalls la s0, sys_ni_syscall /* - * The tracer can change syscall number to valid/invalid value. - * We use syscall_set_nr helper in syscall_trace_enter thus we - * cannot trust the current value in a7 and have to reload from - * the current task pt_regs. - */ - REG_L a7, PT_A7(sp) - /* * Syscall number held in a7. * If syscall number is above allowed value, redirect to ni_syscall. */ bge a7, t0, 1f /* - * Check if syscall is rejected by tracer or seccomp, i.e., a7 == -1. + * Check if syscall is rejected by tracer, i.e., a7 == -1. * If yes, we pretend it was executed. */ li t1, -1 @@ -334,6 +327,7 @@ work_resched: handle_syscall_trace_enter: move a0, sp call do_syscall_trace_enter + move t0, a0 REG_L a0, PT_A0(sp) REG_L a1, PT_A1(sp) REG_L a2, PT_A2(sp) @@ -342,6 +336,7 @@ handle_syscall_trace_enter: REG_L a5, PT_A5(sp) REG_L a6, PT_A6(sp) REG_L a7, PT_A7(sp) + bnez t0, ret_from_syscall_rejected j check_syscall_nr handle_syscall_trace_exit: move a0, sp diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index b7401858d872..8bbe5dbe1341 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -8,6 +8,10 @@ #include <linux/err.h> #include <linux/errno.h> #include <linux/moduleloader.h> +#include <linux/vmalloc.h> +#include <linux/sizes.h> +#include <asm/pgtable.h> +#include <asm/sections.h> static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v) { @@ -386,3 +390,15 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, return 0; } + +#if defined(CONFIG_MMU) && defined(CONFIG_64BIT) +#define VMALLOC_MODULE_START \ + max(PFN_ALIGN((unsigned long)&_end - SZ_2G), VMALLOC_START) +void *module_alloc(unsigned long size) +{ + return __vmalloc_node_range(size, 1, VMALLOC_MODULE_START, + VMALLOC_END, GFP_KERNEL, + PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + __builtin_return_address(0)); +} +#endif diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 407464201b91..444dc7b0fd78 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -148,21 +148,19 @@ long arch_ptrace(struct task_struct *child, long request, * Allows PTRACE_SYSCALL to work. These are called from entry.S in * {handle,ret_from}_syscall. */ -__visible void do_syscall_trace_enter(struct pt_regs *regs) +__visible int do_syscall_trace_enter(struct pt_regs *regs) { if (test_thread_flag(TIF_SYSCALL_TRACE)) if (tracehook_report_syscall_entry(regs)) - syscall_set_nr(current, regs, -1); + return -1; /* * Do the secure computing after ptrace; failures should be fast. * If this fails we might have return value in a0 from seccomp * (via SECCOMP_RET_ERRNO/TRACE). */ - if (secure_computing() == -1) { - syscall_set_nr(current, regs, -1); - return; - } + if (secure_computing() == -1) + return -1; #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) @@ -170,6 +168,7 @@ __visible void do_syscall_trace_enter(struct pt_regs *regs) #endif audit_syscall_entry(regs->a7, regs->a0, regs->a1, regs->a2, regs->a3); + return 0; } __visible void do_syscall_trace_exit(struct pt_regs *regs) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 965a8cf4829c..fab855963c73 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -131,7 +131,7 @@ void __init setup_bootmem(void) for_each_memblock(memory, reg) { phys_addr_t end = reg->base + reg->size; - if (reg->base <= vmlinux_end && vmlinux_end <= end) { + if (reg->base <= vmlinux_start && vmlinux_end <= end) { mem_size = min(reg->size, (phys_addr_t)-PAGE_OFFSET); /* diff --git a/arch/riscv/net/Makefile b/arch/riscv/net/Makefile index ec5b14763316..9a1e5f0a94e5 100644 --- a/arch/riscv/net/Makefile +++ b/arch/riscv/net/Makefile @@ -1,2 +1,9 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o + +obj-$(CONFIG_BPF_JIT) += bpf_jit_core.o + +ifeq ($(CONFIG_ARCH_RV64I),y) + obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o +else + obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o +endif diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h new file mode 100644 index 000000000000..20e235d06f66 --- /dev/null +++ b/arch/riscv/net/bpf_jit.h @@ -0,0 +1,514 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common functionality for RV32 and RV64 BPF JIT compilers + * + * Copyright (c) 2019 Björn Töpel <bjorn.topel@gmail.com> + * + */ + +#ifndef _BPF_JIT_H +#define _BPF_JIT_H + +#include <linux/bpf.h> +#include <linux/filter.h> +#include <asm/cacheflush.h> + +enum { + RV_REG_ZERO = 0, /* The constant value 0 */ + RV_REG_RA = 1, /* Return address */ + RV_REG_SP = 2, /* Stack pointer */ + RV_REG_GP = 3, /* Global pointer */ + RV_REG_TP = 4, /* Thread pointer */ + RV_REG_T0 = 5, /* Temporaries */ + RV_REG_T1 = 6, + RV_REG_T2 = 7, + RV_REG_FP = 8, /* Saved register/frame pointer */ + RV_REG_S1 = 9, /* Saved register */ + RV_REG_A0 = 10, /* Function argument/return values */ + RV_REG_A1 = 11, /* Function arguments */ + RV_REG_A2 = 12, + RV_REG_A3 = 13, + RV_REG_A4 = 14, + RV_REG_A5 = 15, + RV_REG_A6 = 16, + RV_REG_A7 = 17, + RV_REG_S2 = 18, /* Saved registers */ + RV_REG_S3 = 19, + RV_REG_S4 = 20, + RV_REG_S5 = 21, + RV_REG_S6 = 22, + RV_REG_S7 = 23, + RV_REG_S8 = 24, + RV_REG_S9 = 25, + RV_REG_S10 = 26, + RV_REG_S11 = 27, + RV_REG_T3 = 28, /* Temporaries */ + RV_REG_T4 = 29, + RV_REG_T5 = 30, + RV_REG_T6 = 31, +}; + +struct rv_jit_context { + struct bpf_prog *prog; + u32 *insns; /* RV insns */ + int ninsns; + int epilogue_offset; + int *offset; /* BPF to RV */ + unsigned long flags; + int stack_size; +}; + +struct rv_jit_data { + struct bpf_binary_header *header; + u8 *image; + struct rv_jit_context ctx; +}; + +static inline void bpf_fill_ill_insns(void *area, unsigned int size) +{ + memset(area, 0, size); +} + +static inline void bpf_flush_icache(void *start, void *end) +{ + flush_icache_range((unsigned long)start, (unsigned long)end); +} + +static inline void emit(const u32 insn, struct rv_jit_context *ctx) +{ + if (ctx->insns) + ctx->insns[ctx->ninsns] = insn; + + ctx->ninsns++; +} + +static inline int epilogue_offset(struct rv_jit_context *ctx) +{ + int to = ctx->epilogue_offset, from = ctx->ninsns; + + return (to - from) << 2; +} + +/* Return -1 or inverted cond. */ +static inline int invert_bpf_cond(u8 cond) +{ + switch (cond) { + case BPF_JEQ: + return BPF_JNE; + case BPF_JGT: + return BPF_JLE; + case BPF_JLT: + return BPF_JGE; + case BPF_JGE: + return BPF_JLT; + case BPF_JLE: + return BPF_JGT; + case BPF_JNE: + return BPF_JEQ; + case BPF_JSGT: + return BPF_JSLE; + case BPF_JSLT: + return BPF_JSGE; + case BPF_JSGE: + return BPF_JSLT; + case BPF_JSLE: + return BPF_JSGT; + } + return -1; +} + +static inline bool is_12b_int(long val) +{ + return -(1L << 11) <= val && val < (1L << 11); +} + +static inline int is_12b_check(int off, int insn) +{ + if (!is_12b_int(off)) { + pr_err("bpf-jit: insn=%d 12b < offset=%d not supported yet!\n", + insn, (int)off); + return -1; + } + return 0; +} + +static inline bool is_13b_int(long val) +{ + return -(1L << 12) <= val && val < (1L << 12); +} + +static inline bool is_21b_int(long val) +{ + return -(1L << 20) <= val && val < (1L << 20); +} + +static inline int rv_offset(int insn, int off, struct rv_jit_context *ctx) +{ + int from, to; + + off++; /* BPF branch is from PC+1, RV is from PC */ + from = (insn > 0) ? ctx->offset[insn - 1] : 0; + to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0; + return (to - from) << 2; +} + +/* Instruction formats. */ + +static inline u32 rv_r_insn(u8 funct7, u8 rs2, u8 rs1, u8 funct3, u8 rd, + u8 opcode) +{ + return (funct7 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | + (rd << 7) | opcode; +} + +static inline u32 rv_i_insn(u16 imm11_0, u8 rs1, u8 funct3, u8 rd, u8 opcode) +{ + return (imm11_0 << 20) | (rs1 << 15) | (funct3 << 12) | (rd << 7) | + opcode; +} + +static inline u32 rv_s_insn(u16 imm11_0, u8 rs2, u8 rs1, u8 funct3, u8 opcode) +{ + u8 imm11_5 = imm11_0 >> 5, imm4_0 = imm11_0 & 0x1f; + + return (imm11_5 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | + (imm4_0 << 7) | opcode; +} + +static inline u32 rv_b_insn(u16 imm12_1, u8 rs2, u8 rs1, u8 funct3, u8 opcode) +{ + u8 imm12 = ((imm12_1 & 0x800) >> 5) | ((imm12_1 & 0x3f0) >> 4); + u8 imm4_1 = ((imm12_1 & 0xf) << 1) | ((imm12_1 & 0x400) >> 10); + + return (imm12 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | + (imm4_1 << 7) | opcode; +} + +static inline u32 rv_u_insn(u32 imm31_12, u8 rd, u8 opcode) +{ + return (imm31_12 << 12) | (rd << 7) | opcode; +} + +static inline u32 rv_j_insn(u32 imm20_1, u8 rd, u8 opcode) +{ + u32 imm; + + imm = (imm20_1 & 0x80000) | ((imm20_1 & 0x3ff) << 9) | + ((imm20_1 & 0x400) >> 2) | ((imm20_1 & 0x7f800) >> 11); + + return (imm << 12) | (rd << 7) | opcode; +} + +static inline u32 rv_amo_insn(u8 funct5, u8 aq, u8 rl, u8 rs2, u8 rs1, + u8 funct3, u8 rd, u8 opcode) +{ + u8 funct7 = (funct5 << 2) | (aq << 1) | rl; + + return rv_r_insn(funct7, rs2, rs1, funct3, rd, opcode); +} + +/* Instructions shared by both RV32 and RV64. */ + +static inline u32 rv_addi(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 0, rd, 0x13); +} + +static inline u32 rv_andi(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 7, rd, 0x13); +} + +static inline u32 rv_ori(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 6, rd, 0x13); +} + +static inline u32 rv_xori(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 4, rd, 0x13); +} + +static inline u32 rv_slli(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 1, rd, 0x13); +} + +static inline u32 rv_srli(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 5, rd, 0x13); +} + +static inline u32 rv_srai(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(0x400 | imm11_0, rs1, 5, rd, 0x13); +} + +static inline u32 rv_lui(u8 rd, u32 imm31_12) +{ + return rv_u_insn(imm31_12, rd, 0x37); +} + +static inline u32 rv_auipc(u8 rd, u32 imm31_12) +{ + return rv_u_insn(imm31_12, rd, 0x17); +} + +static inline u32 rv_add(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 0, rd, 0x33); +} + +static inline u32 rv_sub(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0x20, rs2, rs1, 0, rd, 0x33); +} + +static inline u32 rv_sltu(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 3, rd, 0x33); +} + +static inline u32 rv_and(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 7, rd, 0x33); +} + +static inline u32 rv_or(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 6, rd, 0x33); +} + +static inline u32 rv_xor(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 4, rd, 0x33); +} + +static inline u32 rv_sll(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 1, rd, 0x33); +} + +static inline u32 rv_srl(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 5, rd, 0x33); +} + +static inline u32 rv_sra(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0x20, rs2, rs1, 5, rd, 0x33); +} + +static inline u32 rv_mul(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 0, rd, 0x33); +} + +static inline u32 rv_mulhu(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 3, rd, 0x33); +} + +static inline u32 rv_divu(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 5, rd, 0x33); +} + +static inline u32 rv_remu(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 7, rd, 0x33); +} + +static inline u32 rv_jal(u8 rd, u32 imm20_1) +{ + return rv_j_insn(imm20_1, rd, 0x6f); +} + +static inline u32 rv_jalr(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 0, rd, 0x67); +} + +static inline u32 rv_beq(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_b_insn(imm12_1, rs2, rs1, 0, 0x63); +} + +static inline u32 rv_bne(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_b_insn(imm12_1, rs2, rs1, 1, 0x63); +} + +static inline u32 rv_bltu(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_b_insn(imm12_1, rs2, rs1, 6, 0x63); +} + +static inline u32 rv_bgtu(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_bltu(rs2, rs1, imm12_1); +} + +static inline u32 rv_bgeu(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_b_insn(imm12_1, rs2, rs1, 7, 0x63); +} + +static inline u32 rv_bleu(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_bgeu(rs2, rs1, imm12_1); +} + +static inline u32 rv_blt(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_b_insn(imm12_1, rs2, rs1, 4, 0x63); +} + +static inline u32 rv_bgt(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_blt(rs2, rs1, imm12_1); +} + +static inline u32 rv_bge(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_b_insn(imm12_1, rs2, rs1, 5, 0x63); +} + +static inline u32 rv_ble(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_bge(rs2, rs1, imm12_1); +} + +static inline u32 rv_lw(u8 rd, u16 imm11_0, u8 rs1) +{ + return rv_i_insn(imm11_0, rs1, 2, rd, 0x03); +} + +static inline u32 rv_lbu(u8 rd, u16 imm11_0, u8 rs1) +{ + return rv_i_insn(imm11_0, rs1, 4, rd, 0x03); +} + +static inline u32 rv_lhu(u8 rd, u16 imm11_0, u8 rs1) +{ + return rv_i_insn(imm11_0, rs1, 5, rd, 0x03); +} + +static inline u32 rv_sb(u8 rs1, u16 imm11_0, u8 rs2) +{ + return rv_s_insn(imm11_0, rs2, rs1, 0, 0x23); +} + +static inline u32 rv_sh(u8 rs1, u16 imm11_0, u8 rs2) +{ + return rv_s_insn(imm11_0, rs2, rs1, 1, 0x23); +} + +static inline u32 rv_sw(u8 rs1, u16 imm11_0, u8 rs2) +{ + return rv_s_insn(imm11_0, rs2, rs1, 2, 0x23); +} + +static inline u32 rv_amoadd_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0, aq, rl, rs2, rs1, 2, rd, 0x2f); +} + +/* + * RV64-only instructions. + * + * These instructions are not available on RV32. Wrap them below a #if to + * ensure that the RV32 JIT doesn't emit any of these instructions. + */ + +#if __riscv_xlen == 64 + +static inline u32 rv_addiw(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 0, rd, 0x1b); +} + +static inline u32 rv_slliw(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 1, rd, 0x1b); +} + +static inline u32 rv_srliw(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 5, rd, 0x1b); +} + +static inline u32 rv_sraiw(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(0x400 | imm11_0, rs1, 5, rd, 0x1b); +} + +static inline u32 rv_addw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 0, rd, 0x3b); +} + +static inline u32 rv_subw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0x20, rs2, rs1, 0, rd, 0x3b); +} + +static inline u32 rv_sllw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 1, rd, 0x3b); +} + +static inline u32 rv_srlw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 5, rd, 0x3b); +} + +static inline u32 rv_sraw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0x20, rs2, rs1, 5, rd, 0x3b); +} + +static inline u32 rv_mulw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 0, rd, 0x3b); +} + +static inline u32 rv_divuw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 5, rd, 0x3b); +} + +static inline u32 rv_remuw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 7, rd, 0x3b); +} + +static inline u32 rv_ld(u8 rd, u16 imm11_0, u8 rs1) +{ + return rv_i_insn(imm11_0, rs1, 3, rd, 0x03); +} + +static inline u32 rv_lwu(u8 rd, u16 imm11_0, u8 rs1) +{ + return rv_i_insn(imm11_0, rs1, 6, rd, 0x03); +} + +static inline u32 rv_sd(u8 rs1, u16 imm11_0, u8 rs2) +{ + return rv_s_insn(imm11_0, rs2, rs1, 3, 0x23); +} + +static inline u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f); +} + +#endif /* __riscv_xlen == 64 */ + +void bpf_jit_build_prologue(struct rv_jit_context *ctx); +void bpf_jit_build_epilogue(struct rv_jit_context *ctx); + +int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, + bool extra_pass); + +#endif /* _BPF_JIT_H */ diff --git a/arch/riscv/net/bpf_jit_comp32.c b/arch/riscv/net/bpf_jit_comp32.c new file mode 100644 index 000000000000..302934177760 --- /dev/null +++ b/arch/riscv/net/bpf_jit_comp32.c @@ -0,0 +1,1310 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * BPF JIT compiler for RV32G + * + * Copyright (c) 2020 Luke Nelson <luke.r.nels@gmail.com> + * Copyright (c) 2020 Xi Wang <xi.wang@gmail.com> + * + * The code is based on the BPF JIT compiler for RV64G by Björn Töpel and + * the BPF JIT compiler for 32-bit ARM by Shubham Bansal and Mircea Gherzan. + */ + +#include <linux/bpf.h> +#include <linux/filter.h> +#include "bpf_jit.h" + +enum { + /* Stack layout - these are offsets from (top of stack - 4). */ + BPF_R6_HI, + BPF_R6_LO, + BPF_R7_HI, + BPF_R7_LO, + BPF_R8_HI, + BPF_R8_LO, + BPF_R9_HI, + BPF_R9_LO, + BPF_AX_HI, + BPF_AX_LO, + /* Stack space for BPF_REG_6 through BPF_REG_9 and BPF_REG_AX. */ + BPF_JIT_SCRATCH_REGS, +}; + +#define STACK_OFFSET(k) (-4 - ((k) * 4)) + +#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) +#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) + +#define RV_REG_TCC RV_REG_T6 +#define RV_REG_TCC_SAVED RV_REG_S7 + +static const s8 bpf2rv32[][2] = { + /* Return value from in-kernel function, and exit value from eBPF. */ + [BPF_REG_0] = {RV_REG_S2, RV_REG_S1}, + /* Arguments from eBPF program to in-kernel function. */ + [BPF_REG_1] = {RV_REG_A1, RV_REG_A0}, + [BPF_REG_2] = {RV_REG_A3, RV_REG_A2}, + [BPF_REG_3] = {RV_REG_A5, RV_REG_A4}, + [BPF_REG_4] = {RV_REG_A7, RV_REG_A6}, + [BPF_REG_5] = {RV_REG_S4, RV_REG_S3}, + /* + * Callee-saved registers that in-kernel function will preserve. + * Stored on the stack. + */ + [BPF_REG_6] = {STACK_OFFSET(BPF_R6_HI), STACK_OFFSET(BPF_R6_LO)}, + [BPF_REG_7] = {STACK_OFFSET(BPF_R7_HI), STACK_OFFSET(BPF_R7_LO)}, + [BPF_REG_8] = {STACK_OFFSET(BPF_R8_HI), STACK_OFFSET(BPF_R8_LO)}, + [BPF_REG_9] = {STACK_OFFSET(BPF_R9_HI), STACK_OFFSET(BPF_R9_LO)}, + /* Read-only frame pointer to access BPF stack. */ + [BPF_REG_FP] = {RV_REG_S6, RV_REG_S5}, + /* Temporary register for blinding constants. Stored on the stack. */ + [BPF_REG_AX] = {STACK_OFFSET(BPF_AX_HI), STACK_OFFSET(BPF_AX_LO)}, + /* + * Temporary registers used by the JIT to operate on registers stored + * on the stack. Save t0 and t1 to be used as temporaries in generated + * code. + */ + [TMP_REG_1] = {RV_REG_T3, RV_REG_T2}, + [TMP_REG_2] = {RV_REG_T5, RV_REG_T4}, +}; + +static s8 hi(const s8 *r) +{ + return r[0]; +} + +static s8 lo(const s8 *r) +{ + return r[1]; +} + +static void emit_imm(const s8 rd, s32 imm, struct rv_jit_context *ctx) +{ + u32 upper = (imm + (1 << 11)) >> 12; + u32 lower = imm & 0xfff; + + if (upper) { + emit(rv_lui(rd, upper), ctx); + emit(rv_addi(rd, rd, lower), ctx); + } else { + emit(rv_addi(rd, RV_REG_ZERO, lower), ctx); + } +} + +static void emit_imm32(const s8 *rd, s32 imm, struct rv_jit_context *ctx) +{ + /* Emit immediate into lower bits. */ + emit_imm(lo(rd), imm, ctx); + + /* Sign-extend into upper bits. */ + if (imm >= 0) + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); + else + emit(rv_addi(hi(rd), RV_REG_ZERO, -1), ctx); +} + +static void emit_imm64(const s8 *rd, s32 imm_hi, s32 imm_lo, + struct rv_jit_context *ctx) +{ + emit_imm(lo(rd), imm_lo, ctx); + emit_imm(hi(rd), imm_hi, ctx); +} + +static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) +{ + int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 4; + const s8 *r0 = bpf2rv32[BPF_REG_0]; + + store_offset -= 4 * BPF_JIT_SCRATCH_REGS; + + /* Set return value if not tail call. */ + if (!is_tail_call) { + emit(rv_addi(RV_REG_A0, lo(r0), 0), ctx); + emit(rv_addi(RV_REG_A1, hi(r0), 0), ctx); + } + + /* Restore callee-saved registers. */ + emit(rv_lw(RV_REG_RA, store_offset - 0, RV_REG_SP), ctx); + emit(rv_lw(RV_REG_FP, store_offset - 4, RV_REG_SP), ctx); + emit(rv_lw(RV_REG_S1, store_offset - 8, RV_REG_SP), ctx); + emit(rv_lw(RV_REG_S2, store_offset - 12, RV_REG_SP), ctx); + emit(rv_lw(RV_REG_S3, store_offset - 16, RV_REG_SP), ctx); + emit(rv_lw(RV_REG_S4, store_offset - 20, RV_REG_SP), ctx); + emit(rv_lw(RV_REG_S5, store_offset - 24, RV_REG_SP), ctx); + emit(rv_lw(RV_REG_S6, store_offset - 28, RV_REG_SP), ctx); + emit(rv_lw(RV_REG_S7, store_offset - 32, RV_REG_SP), ctx); + + emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx); + + if (is_tail_call) { + /* + * goto *(t0 + 4); + * Skips first instruction of prologue which initializes tail + * call counter. Assumes t0 contains address of target program, + * see emit_bpf_tail_call. + */ + emit(rv_jalr(RV_REG_ZERO, RV_REG_T0, 4), ctx); + } else { + emit(rv_jalr(RV_REG_ZERO, RV_REG_RA, 0), ctx); + } +} + +static bool is_stacked(s8 reg) +{ + return reg < 0; +} + +static const s8 *bpf_get_reg64(const s8 *reg, const s8 *tmp, + struct rv_jit_context *ctx) +{ + if (is_stacked(hi(reg))) { + emit(rv_lw(hi(tmp), hi(reg), RV_REG_FP), ctx); + emit(rv_lw(lo(tmp), lo(reg), RV_REG_FP), ctx); + reg = tmp; + } + return reg; +} + +static void bpf_put_reg64(const s8 *reg, const s8 *src, + struct rv_jit_context *ctx) +{ + if (is_stacked(hi(reg))) { + emit(rv_sw(RV_REG_FP, hi(reg), hi(src)), ctx); + emit(rv_sw(RV_REG_FP, lo(reg), lo(src)), ctx); + } +} + +static const s8 *bpf_get_reg32(const s8 *reg, const s8 *tmp, + struct rv_jit_context *ctx) +{ + if (is_stacked(lo(reg))) { + emit(rv_lw(lo(tmp), lo(reg), RV_REG_FP), ctx); + reg = tmp; + } + return reg; +} + +static void bpf_put_reg32(const s8 *reg, const s8 *src, + struct rv_jit_context *ctx) +{ + if (is_stacked(lo(reg))) { + emit(rv_sw(RV_REG_FP, lo(reg), lo(src)), ctx); + if (!ctx->prog->aux->verifier_zext) + emit(rv_sw(RV_REG_FP, hi(reg), RV_REG_ZERO), ctx); + } else if (!ctx->prog->aux->verifier_zext) { + emit(rv_addi(hi(reg), RV_REG_ZERO, 0), ctx); + } +} + +static void emit_jump_and_link(u8 rd, s32 rvoff, bool force_jalr, + struct rv_jit_context *ctx) +{ + s32 upper, lower; + + if (rvoff && is_21b_int(rvoff) && !force_jalr) { + emit(rv_jal(rd, rvoff >> 1), ctx); + return; + } + + upper = (rvoff + (1 << 11)) >> 12; + lower = rvoff & 0xfff; + emit(rv_auipc(RV_REG_T1, upper), ctx); + emit(rv_jalr(rd, RV_REG_T1, lower), ctx); +} + +static void emit_alu_i64(const s8 *dst, s32 imm, + struct rv_jit_context *ctx, const u8 op) +{ + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); + + switch (op) { + case BPF_MOV: + emit_imm32(rd, imm, ctx); + break; + case BPF_AND: + if (is_12b_int(imm)) { + emit(rv_andi(lo(rd), lo(rd), imm), ctx); + } else { + emit_imm(RV_REG_T0, imm, ctx); + emit(rv_and(lo(rd), lo(rd), RV_REG_T0), ctx); + } + if (imm >= 0) + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); + break; + case BPF_OR: + if (is_12b_int(imm)) { + emit(rv_ori(lo(rd), lo(rd), imm), ctx); + } else { + emit_imm(RV_REG_T0, imm, ctx); + emit(rv_or(lo(rd), lo(rd), RV_REG_T0), ctx); + } + if (imm < 0) + emit(rv_ori(hi(rd), RV_REG_ZERO, -1), ctx); + break; + case BPF_XOR: + if (is_12b_int(imm)) { + emit(rv_xori(lo(rd), lo(rd), imm), ctx); + } else { + emit_imm(RV_REG_T0, imm, ctx); + emit(rv_xor(lo(rd), lo(rd), RV_REG_T0), ctx); + } + if (imm < 0) + emit(rv_xori(hi(rd), hi(rd), -1), ctx); + break; + case BPF_LSH: + if (imm >= 32) { + emit(rv_slli(hi(rd), lo(rd), imm - 32), ctx); + emit(rv_addi(lo(rd), RV_REG_ZERO, 0), ctx); + } else if (imm == 0) { + /* Do nothing. */ + } else { + emit(rv_srli(RV_REG_T0, lo(rd), 32 - imm), ctx); + emit(rv_slli(hi(rd), hi(rd), imm), ctx); + emit(rv_or(hi(rd), RV_REG_T0, hi(rd)), ctx); + emit(rv_slli(lo(rd), lo(rd), imm), ctx); + } + break; + case BPF_RSH: + if (imm >= 32) { + emit(rv_srli(lo(rd), hi(rd), imm - 32), ctx); + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); + } else if (imm == 0) { + /* Do nothing. */ + } else { + emit(rv_slli(RV_REG_T0, hi(rd), 32 - imm), ctx); + emit(rv_srli(lo(rd), lo(rd), imm), ctx); + emit(rv_or(lo(rd), RV_REG_T0, lo(rd)), ctx); + emit(rv_srli(hi(rd), hi(rd), imm), ctx); + } + break; + case BPF_ARSH: + if (imm >= 32) { + emit(rv_srai(lo(rd), hi(rd), imm - 32), ctx); + emit(rv_srai(hi(rd), hi(rd), 31), ctx); + } else if (imm == 0) { + /* Do nothing. */ + } else { + emit(rv_slli(RV_REG_T0, hi(rd), 32 - imm), ctx); + emit(rv_srli(lo(rd), lo(rd), imm), ctx); + emit(rv_or(lo(rd), RV_REG_T0, lo(rd)), ctx); + emit(rv_srai(hi(rd), hi(rd), imm), ctx); + } + break; + } + + bpf_put_reg64(dst, rd, ctx); +} + +static void emit_alu_i32(const s8 *dst, s32 imm, + struct rv_jit_context *ctx, const u8 op) +{ + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; + const s8 *rd = bpf_get_reg32(dst, tmp1, ctx); + + switch (op) { + case BPF_MOV: + emit_imm(lo(rd), imm, ctx); + break; + case BPF_ADD: + if (is_12b_int(imm)) { + emit(rv_addi(lo(rd), lo(rd), imm), ctx); + } else { + emit_imm(RV_REG_T0, imm, ctx); + emit(rv_add(lo(rd), lo(rd), RV_REG_T0), ctx); + } + break; + case BPF_SUB: + if (is_12b_int(-imm)) { + emit(rv_addi(lo(rd), lo(rd), -imm), ctx); + } else { + emit_imm(RV_REG_T0, imm, ctx); + emit(rv_sub(lo(rd), lo(rd), RV_REG_T0), ctx); + } + break; + case BPF_AND: + if (is_12b_int(imm)) { + emit(rv_andi(lo(rd), lo(rd), imm), ctx); + } else { + emit_imm(RV_REG_T0, imm, ctx); + emit(rv_and(lo(rd), lo(rd), RV_REG_T0), ctx); + } + break; + case BPF_OR: + if (is_12b_int(imm)) { + emit(rv_ori(lo(rd), lo(rd), imm), ctx); + } else { + emit_imm(RV_REG_T0, imm, ctx); + emit(rv_or(lo(rd), lo(rd), RV_REG_T0), ctx); + } + break; + case BPF_XOR: + if (is_12b_int(imm)) { + emit(rv_xori(lo(rd), lo(rd), imm), ctx); + } else { + emit_imm(RV_REG_T0, imm, ctx); + emit(rv_xor(lo(rd), lo(rd), RV_REG_T0), ctx); + } + break; + case BPF_LSH: + if (is_12b_int(imm)) { + emit(rv_slli(lo(rd), lo(rd), imm), ctx); + } else { + emit_imm(RV_REG_T0, imm, ctx); + emit(rv_sll(lo(rd), lo(rd), RV_REG_T0), ctx); + } + break; + case BPF_RSH: + if (is_12b_int(imm)) { + emit(rv_srli(lo(rd), lo(rd), imm), ctx); + } else { + emit_imm(RV_REG_T0, imm, ctx); + emit(rv_srl(lo(rd), lo(rd), RV_REG_T0), ctx); + } + break; + case BPF_ARSH: + if (is_12b_int(imm)) { + emit(rv_srai(lo(rd), lo(rd), imm), ctx); + } else { + emit_imm(RV_REG_T0, imm, ctx); + emit(rv_sra(lo(rd), lo(rd), RV_REG_T0), ctx); + } + break; + } + + bpf_put_reg32(dst, rd, ctx); +} + +static void emit_alu_r64(const s8 *dst, const s8 *src, + struct rv_jit_context *ctx, const u8 op) +{ + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; + const s8 *tmp2 = bpf2rv32[TMP_REG_2]; + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); + const s8 *rs = bpf_get_reg64(src, tmp2, ctx); + + switch (op) { + case BPF_MOV: + emit(rv_addi(lo(rd), lo(rs), 0), ctx); + emit(rv_addi(hi(rd), hi(rs), 0), ctx); + break; + case BPF_ADD: + if (rd == rs) { + emit(rv_srli(RV_REG_T0, lo(rd), 31), ctx); + emit(rv_slli(hi(rd), hi(rd), 1), ctx); + emit(rv_or(hi(rd), RV_REG_T0, hi(rd)), ctx); + emit(rv_slli(lo(rd), lo(rd), 1), ctx); + } else { + emit(rv_add(lo(rd), lo(rd), lo(rs)), ctx); + emit(rv_sltu(RV_REG_T0, lo(rd), lo(rs)), ctx); + emit(rv_add(hi(rd), hi(rd), hi(rs)), ctx); + emit(rv_add(hi(rd), hi(rd), RV_REG_T0), ctx); + } + break; + case BPF_SUB: + emit(rv_sub(RV_REG_T1, hi(rd), hi(rs)), ctx); + emit(rv_sltu(RV_REG_T0, lo(rd), lo(rs)), ctx); + emit(rv_sub(hi(rd), RV_REG_T1, RV_REG_T0), ctx); + emit(rv_sub(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_AND: + emit(rv_and(lo(rd), lo(rd), lo(rs)), ctx); + emit(rv_and(hi(rd), hi(rd), hi(rs)), ctx); + break; + case BPF_OR: + emit(rv_or(lo(rd), lo(rd), lo(rs)), ctx); + emit(rv_or(hi(rd), hi(rd), hi(rs)), ctx); + break; + case BPF_XOR: + emit(rv_xor(lo(rd), lo(rd), lo(rs)), ctx); + emit(rv_xor(hi(rd), hi(rd), hi(rs)), ctx); + break; + case BPF_MUL: + emit(rv_mul(RV_REG_T0, hi(rs), lo(rd)), ctx); + emit(rv_mul(hi(rd), hi(rd), lo(rs)), ctx); + emit(rv_mulhu(RV_REG_T1, lo(rd), lo(rs)), ctx); + emit(rv_add(hi(rd), hi(rd), RV_REG_T0), ctx); + emit(rv_mul(lo(rd), lo(rd), lo(rs)), ctx); + emit(rv_add(hi(rd), hi(rd), RV_REG_T1), ctx); + break; + case BPF_LSH: + emit(rv_addi(RV_REG_T0, lo(rs), -32), ctx); + emit(rv_blt(RV_REG_T0, RV_REG_ZERO, 8), ctx); + emit(rv_sll(hi(rd), lo(rd), RV_REG_T0), ctx); + emit(rv_addi(lo(rd), RV_REG_ZERO, 0), ctx); + emit(rv_jal(RV_REG_ZERO, 16), ctx); + emit(rv_addi(RV_REG_T1, RV_REG_ZERO, 31), ctx); + emit(rv_srli(RV_REG_T0, lo(rd), 1), ctx); + emit(rv_sub(RV_REG_T1, RV_REG_T1, lo(rs)), ctx); + emit(rv_srl(RV_REG_T0, RV_REG_T0, RV_REG_T1), ctx); + emit(rv_sll(hi(rd), hi(rd), lo(rs)), ctx); + emit(rv_or(hi(rd), RV_REG_T0, hi(rd)), ctx); + emit(rv_sll(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_RSH: + emit(rv_addi(RV_REG_T0, lo(rs), -32), ctx); + emit(rv_blt(RV_REG_T0, RV_REG_ZERO, 8), ctx); + emit(rv_srl(lo(rd), hi(rd), RV_REG_T0), ctx); + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); + emit(rv_jal(RV_REG_ZERO, 16), ctx); + emit(rv_addi(RV_REG_T1, RV_REG_ZERO, 31), ctx); + emit(rv_slli(RV_REG_T0, hi(rd), 1), ctx); + emit(rv_sub(RV_REG_T1, RV_REG_T1, lo(rs)), ctx); + emit(rv_sll(RV_REG_T0, RV_REG_T0, RV_REG_T1), ctx); + emit(rv_srl(lo(rd), lo(rd), lo(rs)), ctx); + emit(rv_or(lo(rd), RV_REG_T0, lo(rd)), ctx); + emit(rv_srl(hi(rd), hi(rd), lo(rs)), ctx); + break; + case BPF_ARSH: + emit(rv_addi(RV_REG_T0, lo(rs), -32), ctx); + emit(rv_blt(RV_REG_T0, RV_REG_ZERO, 8), ctx); + emit(rv_sra(lo(rd), hi(rd), RV_REG_T0), ctx); + emit(rv_srai(hi(rd), hi(rd), 31), ctx); + emit(rv_jal(RV_REG_ZERO, 16), ctx); + emit(rv_addi(RV_REG_T1, RV_REG_ZERO, 31), ctx); + emit(rv_slli(RV_REG_T0, hi(rd), 1), ctx); + emit(rv_sub(RV_REG_T1, RV_REG_T1, lo(rs)), ctx); + emit(rv_sll(RV_REG_T0, RV_REG_T0, RV_REG_T1), ctx); + emit(rv_srl(lo(rd), lo(rd), lo(rs)), ctx); + emit(rv_or(lo(rd), RV_REG_T0, lo(rd)), ctx); + emit(rv_sra(hi(rd), hi(rd), lo(rs)), ctx); + break; + case BPF_NEG: + emit(rv_sub(lo(rd), RV_REG_ZERO, lo(rd)), ctx); + emit(rv_sltu(RV_REG_T0, RV_REG_ZERO, lo(rd)), ctx); + emit(rv_sub(hi(rd), RV_REG_ZERO, hi(rd)), ctx); + emit(rv_sub(hi(rd), hi(rd), RV_REG_T0), ctx); + break; + } + + bpf_put_reg64(dst, rd, ctx); +} + +static void emit_alu_r32(const s8 *dst, const s8 *src, + struct rv_jit_context *ctx, const u8 op) +{ + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; + const s8 *tmp2 = bpf2rv32[TMP_REG_2]; + const s8 *rd = bpf_get_reg32(dst, tmp1, ctx); + const s8 *rs = bpf_get_reg32(src, tmp2, ctx); + + switch (op) { + case BPF_MOV: + emit(rv_addi(lo(rd), lo(rs), 0), ctx); + break; + case BPF_ADD: + emit(rv_add(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_SUB: + emit(rv_sub(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_AND: + emit(rv_and(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_OR: + emit(rv_or(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_XOR: + emit(rv_xor(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_MUL: + emit(rv_mul(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_DIV: + emit(rv_divu(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_MOD: + emit(rv_remu(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_LSH: + emit(rv_sll(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_RSH: + emit(rv_srl(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_ARSH: + emit(rv_sra(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_NEG: + emit(rv_sub(lo(rd), RV_REG_ZERO, lo(rd)), ctx); + break; + } + + bpf_put_reg32(dst, rd, ctx); +} + +static int emit_branch_r64(const s8 *src1, const s8 *src2, s32 rvoff, + struct rv_jit_context *ctx, const u8 op) +{ + int e, s = ctx->ninsns; + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; + const s8 *tmp2 = bpf2rv32[TMP_REG_2]; + + const s8 *rs1 = bpf_get_reg64(src1, tmp1, ctx); + const s8 *rs2 = bpf_get_reg64(src2, tmp2, ctx); + + /* + * NO_JUMP skips over the rest of the instructions and the + * emit_jump_and_link, meaning the BPF branch is not taken. + * JUMP skips directly to the emit_jump_and_link, meaning + * the BPF branch is taken. + * + * The fallthrough case results in the BPF branch being taken. + */ +#define NO_JUMP(idx) (6 + (2 * (idx))) +#define JUMP(idx) (2 + (2 * (idx))) + + switch (op) { + case BPF_JEQ: + emit(rv_bne(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(rv_bne(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JGT: + emit(rv_bgtu(hi(rs1), hi(rs2), JUMP(2)), ctx); + emit(rv_bltu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(rv_bleu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JLT: + emit(rv_bltu(hi(rs1), hi(rs2), JUMP(2)), ctx); + emit(rv_bgtu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(rv_bgeu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JGE: + emit(rv_bgtu(hi(rs1), hi(rs2), JUMP(2)), ctx); + emit(rv_bltu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(rv_bltu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JLE: + emit(rv_bltu(hi(rs1), hi(rs2), JUMP(2)), ctx); + emit(rv_bgtu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(rv_bgtu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JNE: + emit(rv_bne(hi(rs1), hi(rs2), JUMP(1)), ctx); + emit(rv_beq(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JSGT: + emit(rv_bgt(hi(rs1), hi(rs2), JUMP(2)), ctx); + emit(rv_blt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(rv_bleu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JSLT: + emit(rv_blt(hi(rs1), hi(rs2), JUMP(2)), ctx); + emit(rv_bgt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(rv_bgeu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JSGE: + emit(rv_bgt(hi(rs1), hi(rs2), JUMP(2)), ctx); + emit(rv_blt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(rv_bltu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JSLE: + emit(rv_blt(hi(rs1), hi(rs2), JUMP(2)), ctx); + emit(rv_bgt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(rv_bgtu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JSET: + emit(rv_and(RV_REG_T0, hi(rs1), hi(rs2)), ctx); + emit(rv_bne(RV_REG_T0, RV_REG_ZERO, JUMP(2)), ctx); + emit(rv_and(RV_REG_T0, lo(rs1), lo(rs2)), ctx); + emit(rv_beq(RV_REG_T0, RV_REG_ZERO, NO_JUMP(0)), ctx); + break; + } + +#undef NO_JUMP +#undef JUMP + + e = ctx->ninsns; + /* Adjust for extra insns. */ + rvoff -= (e - s) << 2; + emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx); + return 0; +} + +static int emit_bcc(u8 op, u8 rd, u8 rs, int rvoff, struct rv_jit_context *ctx) +{ + int e, s = ctx->ninsns; + bool far = false; + int off; + + if (op == BPF_JSET) { + /* + * BPF_JSET is a special case: it has no inverse so we always + * treat it as a far branch. + */ + far = true; + } else if (!is_13b_int(rvoff)) { + op = invert_bpf_cond(op); + far = true; + } + + /* + * For a far branch, the condition is negated and we jump over the + * branch itself, and the two instructions from emit_jump_and_link. + * For a near branch, just use rvoff. + */ + off = far ? 6 : (rvoff >> 1); + + switch (op) { + case BPF_JEQ: + emit(rv_beq(rd, rs, off), ctx); + break; + case BPF_JGT: + emit(rv_bgtu(rd, rs, off), ctx); + break; + case BPF_JLT: + emit(rv_bltu(rd, rs, off), ctx); + break; + case BPF_JGE: + emit(rv_bgeu(rd, rs, off), ctx); + break; + case BPF_JLE: + emit(rv_bleu(rd, rs, off), ctx); + break; + case BPF_JNE: + emit(rv_bne(rd, rs, off), ctx); + break; + case BPF_JSGT: + emit(rv_bgt(rd, rs, off), ctx); + break; + case BPF_JSLT: + emit(rv_blt(rd, rs, off), ctx); + break; + case BPF_JSGE: + emit(rv_bge(rd, rs, off), ctx); + break; + case BPF_JSLE: + emit(rv_ble(rd, rs, off), ctx); + break; + case BPF_JSET: + emit(rv_and(RV_REG_T0, rd, rs), ctx); + emit(rv_beq(RV_REG_T0, RV_REG_ZERO, off), ctx); + break; + } + + if (far) { + e = ctx->ninsns; + /* Adjust for extra insns. */ + rvoff -= (e - s) << 2; + emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx); + } + return 0; +} + +static int emit_branch_r32(const s8 *src1, const s8 *src2, s32 rvoff, + struct rv_jit_context *ctx, const u8 op) +{ + int e, s = ctx->ninsns; + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; + const s8 *tmp2 = bpf2rv32[TMP_REG_2]; + + const s8 *rs1 = bpf_get_reg32(src1, tmp1, ctx); + const s8 *rs2 = bpf_get_reg32(src2, tmp2, ctx); + + e = ctx->ninsns; + /* Adjust for extra insns. */ + rvoff -= (e - s) << 2; + + if (emit_bcc(op, lo(rs1), lo(rs2), rvoff, ctx)) + return -1; + + return 0; +} + +static void emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx) +{ + const s8 *r0 = bpf2rv32[BPF_REG_0]; + const s8 *r5 = bpf2rv32[BPF_REG_5]; + u32 upper = ((u32)addr + (1 << 11)) >> 12; + u32 lower = addr & 0xfff; + + /* R1-R4 already in correct registers---need to push R5 to stack. */ + emit(rv_addi(RV_REG_SP, RV_REG_SP, -16), ctx); + emit(rv_sw(RV_REG_SP, 0, lo(r5)), ctx); + emit(rv_sw(RV_REG_SP, 4, hi(r5)), ctx); + + /* Backup TCC. */ + emit(rv_addi(RV_REG_TCC_SAVED, RV_REG_TCC, 0), ctx); + + /* + * Use lui/jalr pair to jump to absolute address. Don't use emit_imm as + * the number of emitted instructions should not depend on the value of + * addr. + */ + emit(rv_lui(RV_REG_T1, upper), ctx); + emit(rv_jalr(RV_REG_RA, RV_REG_T1, lower), ctx); + + /* Restore TCC. */ + emit(rv_addi(RV_REG_TCC, RV_REG_TCC_SAVED, 0), ctx); + + /* Set return value and restore stack. */ + emit(rv_addi(lo(r0), RV_REG_A0, 0), ctx); + emit(rv_addi(hi(r0), RV_REG_A1, 0), ctx); + emit(rv_addi(RV_REG_SP, RV_REG_SP, 16), ctx); +} + +static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) +{ + /* + * R1 -> &ctx + * R2 -> &array + * R3 -> index + */ + int tc_ninsn, off, start_insn = ctx->ninsns; + const s8 *arr_reg = bpf2rv32[BPF_REG_2]; + const s8 *idx_reg = bpf2rv32[BPF_REG_3]; + + tc_ninsn = insn ? ctx->offset[insn] - ctx->offset[insn - 1] : + ctx->offset[0]; + + /* max_entries = array->map.max_entries; */ + off = offsetof(struct bpf_array, map.max_entries); + if (is_12b_check(off, insn)) + return -1; + emit(rv_lw(RV_REG_T1, off, lo(arr_reg)), ctx); + + /* + * if (index >= max_entries) + * goto out; + */ + off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; + emit_bcc(BPF_JGE, lo(idx_reg), RV_REG_T1, off, ctx); + + /* + * if ((temp_tcc = tcc - 1) < 0) + * goto out; + */ + emit(rv_addi(RV_REG_T1, RV_REG_TCC, -1), ctx); + off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; + emit_bcc(BPF_JSLT, RV_REG_T1, RV_REG_ZERO, off, ctx); + + /* + * prog = array->ptrs[index]; + * if (!prog) + * goto out; + */ + emit(rv_slli(RV_REG_T0, lo(idx_reg), 2), ctx); + emit(rv_add(RV_REG_T0, RV_REG_T0, lo(arr_reg)), ctx); + off = offsetof(struct bpf_array, ptrs); + if (is_12b_check(off, insn)) + return -1; + emit(rv_lw(RV_REG_T0, off, RV_REG_T0), ctx); + off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; + emit_bcc(BPF_JEQ, RV_REG_T0, RV_REG_ZERO, off, ctx); + + /* + * tcc = temp_tcc; + * goto *(prog->bpf_func + 4); + */ + off = offsetof(struct bpf_prog, bpf_func); + if (is_12b_check(off, insn)) + return -1; + emit(rv_lw(RV_REG_T0, off, RV_REG_T0), ctx); + emit(rv_addi(RV_REG_TCC, RV_REG_T1, 0), ctx); + /* Epilogue jumps to *(t0 + 4). */ + __build_epilogue(true, ctx); + return 0; +} + +static int emit_load_r64(const s8 *dst, const s8 *src, s16 off, + struct rv_jit_context *ctx, const u8 size) +{ + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; + const s8 *tmp2 = bpf2rv32[TMP_REG_2]; + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); + const s8 *rs = bpf_get_reg64(src, tmp2, ctx); + + emit_imm(RV_REG_T0, off, ctx); + emit(rv_add(RV_REG_T0, RV_REG_T0, lo(rs)), ctx); + + switch (size) { + case BPF_B: + emit(rv_lbu(lo(rd), 0, RV_REG_T0), ctx); + if (!ctx->prog->aux->verifier_zext) + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); + break; + case BPF_H: + emit(rv_lhu(lo(rd), 0, RV_REG_T0), ctx); + if (!ctx->prog->aux->verifier_zext) + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); + break; + case BPF_W: + emit(rv_lw(lo(rd), 0, RV_REG_T0), ctx); + if (!ctx->prog->aux->verifier_zext) + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); + break; + case BPF_DW: + emit(rv_lw(lo(rd), 0, RV_REG_T0), ctx); + emit(rv_lw(hi(rd), 4, RV_REG_T0), ctx); + break; + } + + bpf_put_reg64(dst, rd, ctx); + return 0; +} + +static int emit_store_r64(const s8 *dst, const s8 *src, s16 off, + struct rv_jit_context *ctx, const u8 size, + const u8 mode) +{ + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; + const s8 *tmp2 = bpf2rv32[TMP_REG_2]; + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); + const s8 *rs = bpf_get_reg64(src, tmp2, ctx); + + if (mode == BPF_XADD && size != BPF_W) + return -1; + + emit_imm(RV_REG_T0, off, ctx); + emit(rv_add(RV_REG_T0, RV_REG_T0, lo(rd)), ctx); + + switch (size) { + case BPF_B: + emit(rv_sb(RV_REG_T0, 0, lo(rs)), ctx); + break; + case BPF_H: + emit(rv_sh(RV_REG_T0, 0, lo(rs)), ctx); + break; + case BPF_W: + switch (mode) { + case BPF_MEM: + emit(rv_sw(RV_REG_T0, 0, lo(rs)), ctx); + break; + case BPF_XADD: + emit(rv_amoadd_w(RV_REG_ZERO, lo(rs), RV_REG_T0, 0, 0), + ctx); + break; + } + break; + case BPF_DW: + emit(rv_sw(RV_REG_T0, 0, lo(rs)), ctx); + emit(rv_sw(RV_REG_T0, 4, hi(rs)), ctx); + break; + } + + return 0; +} + +static void emit_rev16(const s8 rd, struct rv_jit_context *ctx) +{ + emit(rv_slli(rd, rd, 16), ctx); + emit(rv_slli(RV_REG_T1, rd, 8), ctx); + emit(rv_srli(rd, rd, 8), ctx); + emit(rv_add(RV_REG_T1, rd, RV_REG_T1), ctx); + emit(rv_srli(rd, RV_REG_T1, 16), ctx); +} + +static void emit_rev32(const s8 rd, struct rv_jit_context *ctx) +{ + emit(rv_addi(RV_REG_T1, RV_REG_ZERO, 0), ctx); + emit(rv_andi(RV_REG_T0, rd, 255), ctx); + emit(rv_add(RV_REG_T1, RV_REG_T1, RV_REG_T0), ctx); + emit(rv_slli(RV_REG_T1, RV_REG_T1, 8), ctx); + emit(rv_srli(rd, rd, 8), ctx); + emit(rv_andi(RV_REG_T0, rd, 255), ctx); + emit(rv_add(RV_REG_T1, RV_REG_T1, RV_REG_T0), ctx); + emit(rv_slli(RV_REG_T1, RV_REG_T1, 8), ctx); + emit(rv_srli(rd, rd, 8), ctx); + emit(rv_andi(RV_REG_T0, rd, 255), ctx); + emit(rv_add(RV_REG_T1, RV_REG_T1, RV_REG_T0), ctx); + emit(rv_slli(RV_REG_T1, RV_REG_T1, 8), ctx); + emit(rv_srli(rd, rd, 8), ctx); + emit(rv_andi(RV_REG_T0, rd, 255), ctx); + emit(rv_add(RV_REG_T1, RV_REG_T1, RV_REG_T0), ctx); + emit(rv_addi(rd, RV_REG_T1, 0), ctx); +} + +static void emit_zext64(const s8 *dst, struct rv_jit_context *ctx) +{ + const s8 *rd; + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; + + rd = bpf_get_reg64(dst, tmp1, ctx); + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); + bpf_put_reg64(dst, rd, ctx); +} + +int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, + bool extra_pass) +{ + bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 || + BPF_CLASS(insn->code) == BPF_JMP; + int s, e, rvoff, i = insn - ctx->prog->insnsi; + u8 code = insn->code; + s16 off = insn->off; + s32 imm = insn->imm; + + const s8 *dst = bpf2rv32[insn->dst_reg]; + const s8 *src = bpf2rv32[insn->src_reg]; + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; + const s8 *tmp2 = bpf2rv32[TMP_REG_2]; + + switch (code) { + case BPF_ALU64 | BPF_MOV | BPF_X: + + case BPF_ALU64 | BPF_ADD | BPF_X: + case BPF_ALU64 | BPF_ADD | BPF_K: + + case BPF_ALU64 | BPF_SUB | BPF_X: + case BPF_ALU64 | BPF_SUB | BPF_K: + + case BPF_ALU64 | BPF_AND | BPF_X: + case BPF_ALU64 | BPF_OR | BPF_X: + case BPF_ALU64 | BPF_XOR | BPF_X: + + case BPF_ALU64 | BPF_MUL | BPF_X: + case BPF_ALU64 | BPF_MUL | BPF_K: + + case BPF_ALU64 | BPF_LSH | BPF_X: + case BPF_ALU64 | BPF_RSH | BPF_X: + case BPF_ALU64 | BPF_ARSH | BPF_X: + if (BPF_SRC(code) == BPF_K) { + emit_imm32(tmp2, imm, ctx); + src = tmp2; + } + emit_alu_r64(dst, src, ctx, BPF_OP(code)); + break; + + case BPF_ALU64 | BPF_NEG: + emit_alu_r64(dst, tmp2, ctx, BPF_OP(code)); + break; + + case BPF_ALU64 | BPF_DIV | BPF_X: + case BPF_ALU64 | BPF_DIV | BPF_K: + case BPF_ALU64 | BPF_MOD | BPF_X: + case BPF_ALU64 | BPF_MOD | BPF_K: + goto notsupported; + + case BPF_ALU64 | BPF_MOV | BPF_K: + case BPF_ALU64 | BPF_AND | BPF_K: + case BPF_ALU64 | BPF_OR | BPF_K: + case BPF_ALU64 | BPF_XOR | BPF_K: + case BPF_ALU64 | BPF_LSH | BPF_K: + case BPF_ALU64 | BPF_RSH | BPF_K: + case BPF_ALU64 | BPF_ARSH | BPF_K: + emit_alu_i64(dst, imm, ctx, BPF_OP(code)); + break; + + case BPF_ALU | BPF_MOV | BPF_X: + if (imm == 1) { + /* Special mov32 for zext. */ + emit_zext64(dst, ctx); + break; + } + /* Fallthrough. */ + + case BPF_ALU | BPF_ADD | BPF_X: + case BPF_ALU | BPF_SUB | BPF_X: + case BPF_ALU | BPF_AND | BPF_X: + case BPF_ALU | BPF_OR | BPF_X: + case BPF_ALU | BPF_XOR | BPF_X: + + case BPF_ALU | BPF_MUL | BPF_X: + case BPF_ALU | BPF_MUL | BPF_K: + + case BPF_ALU | BPF_DIV | BPF_X: + case BPF_ALU | BPF_DIV | BPF_K: + + case BPF_ALU | BPF_MOD | BPF_X: + case BPF_ALU | BPF_MOD | BPF_K: + + case BPF_ALU | BPF_LSH | BPF_X: + case BPF_ALU | BPF_RSH | BPF_X: + case BPF_ALU | BPF_ARSH | BPF_X: + if (BPF_SRC(code) == BPF_K) { + emit_imm32(tmp2, imm, ctx); + src = tmp2; + } + emit_alu_r32(dst, src, ctx, BPF_OP(code)); + break; + + case BPF_ALU | BPF_MOV | BPF_K: + case BPF_ALU | BPF_ADD | BPF_K: + case BPF_ALU | BPF_SUB | BPF_K: + case BPF_ALU | BPF_AND | BPF_K: + case BPF_ALU | BPF_OR | BPF_K: + case BPF_ALU | BPF_XOR | BPF_K: + case BPF_ALU | BPF_LSH | BPF_K: + case BPF_ALU | BPF_RSH | BPF_K: + case BPF_ALU | BPF_ARSH | BPF_K: + /* + * mul,div,mod are handled in the BPF_X case since there are + * no RISC-V I-type equivalents. + */ + emit_alu_i32(dst, imm, ctx, BPF_OP(code)); + break; + + case BPF_ALU | BPF_NEG: + /* + * src is ignored---choose tmp2 as a dummy register since it + * is not on the stack. + */ + emit_alu_r32(dst, tmp2, ctx, BPF_OP(code)); + break; + + case BPF_ALU | BPF_END | BPF_FROM_LE: + { + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); + + switch (imm) { + case 16: + emit(rv_slli(lo(rd), lo(rd), 16), ctx); + emit(rv_srli(lo(rd), lo(rd), 16), ctx); + /* Fallthrough. */ + case 32: + if (!ctx->prog->aux->verifier_zext) + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); + break; + case 64: + /* Do nothing. */ + break; + default: + pr_err("bpf-jit: BPF_END imm %d invalid\n", imm); + return -1; + } + + bpf_put_reg64(dst, rd, ctx); + break; + } + + case BPF_ALU | BPF_END | BPF_FROM_BE: + { + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); + + switch (imm) { + case 16: + emit_rev16(lo(rd), ctx); + if (!ctx->prog->aux->verifier_zext) + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); + break; + case 32: + emit_rev32(lo(rd), ctx); + if (!ctx->prog->aux->verifier_zext) + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); + break; + case 64: + /* Swap upper and lower halves. */ + emit(rv_addi(RV_REG_T0, lo(rd), 0), ctx); + emit(rv_addi(lo(rd), hi(rd), 0), ctx); + emit(rv_addi(hi(rd), RV_REG_T0, 0), ctx); + + /* Swap each half. */ + emit_rev32(lo(rd), ctx); + emit_rev32(hi(rd), ctx); + break; + default: + pr_err("bpf-jit: BPF_END imm %d invalid\n", imm); + return -1; + } + + bpf_put_reg64(dst, rd, ctx); + break; + } + + case BPF_JMP | BPF_JA: + rvoff = rv_offset(i, off, ctx); + emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx); + break; + + case BPF_JMP | BPF_CALL: + { + bool fixed; + int ret; + u64 addr; + + ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &addr, + &fixed); + if (ret < 0) + return ret; + emit_call(fixed, addr, ctx); + break; + } + + case BPF_JMP | BPF_TAIL_CALL: + if (emit_bpf_tail_call(i, ctx)) + return -1; + break; + + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JEQ | BPF_K: + + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_K: + + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_K: + + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_K: + + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_K: + + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_K: + + case BPF_JMP | BPF_JSLE | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_K: + + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_K: + + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_K: + + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_K: + + case BPF_JMP | BPF_JSET | BPF_X: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_X: + case BPF_JMP32 | BPF_JSET | BPF_K: + rvoff = rv_offset(i, off, ctx); + if (BPF_SRC(code) == BPF_K) { + s = ctx->ninsns; + emit_imm32(tmp2, imm, ctx); + src = tmp2; + e = ctx->ninsns; + rvoff -= (e - s) << 2; + } + + if (is64) + emit_branch_r64(dst, src, rvoff, ctx, BPF_OP(code)); + else + emit_branch_r32(dst, src, rvoff, ctx, BPF_OP(code)); + break; + + case BPF_JMP | BPF_EXIT: + if (i == ctx->prog->len - 1) + break; + + rvoff = epilogue_offset(ctx); + emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx); + break; + + case BPF_LD | BPF_IMM | BPF_DW: + { + struct bpf_insn insn1 = insn[1]; + s32 imm_lo = imm; + s32 imm_hi = insn1.imm; + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); + + emit_imm64(rd, imm_hi, imm_lo, ctx); + bpf_put_reg64(dst, rd, ctx); + return 1; + } + + case BPF_LDX | BPF_MEM | BPF_B: + case BPF_LDX | BPF_MEM | BPF_H: + case BPF_LDX | BPF_MEM | BPF_W: + case BPF_LDX | BPF_MEM | BPF_DW: + if (emit_load_r64(dst, src, off, ctx, BPF_SIZE(code))) + return -1; + break; + + case BPF_ST | BPF_MEM | BPF_B: + case BPF_ST | BPF_MEM | BPF_H: + case BPF_ST | BPF_MEM | BPF_W: + case BPF_ST | BPF_MEM | BPF_DW: + + case BPF_STX | BPF_MEM | BPF_B: + case BPF_STX | BPF_MEM | BPF_H: + case BPF_STX | BPF_MEM | BPF_W: + case BPF_STX | BPF_MEM | BPF_DW: + case BPF_STX | BPF_XADD | BPF_W: + if (BPF_CLASS(code) == BPF_ST) { + emit_imm32(tmp2, imm, ctx); + src = tmp2; + } + + if (emit_store_r64(dst, src, off, ctx, BPF_SIZE(code), + BPF_MODE(code))) + return -1; + break; + + /* No hardware support for 8-byte atomics in RV32. */ + case BPF_STX | BPF_XADD | BPF_DW: + /* Fallthrough. */ + +notsupported: + pr_info_once("bpf-jit: not supported: opcode %02x ***\n", code); + return -EFAULT; + + default: + pr_err("bpf-jit: unknown opcode %02x\n", code); + return -EINVAL; + } + + return 0; +} + +void bpf_jit_build_prologue(struct rv_jit_context *ctx) +{ + /* Make space to save 9 registers: ra, fp, s1--s7. */ + int stack_adjust = 9 * sizeof(u32), store_offset, bpf_stack_adjust; + const s8 *fp = bpf2rv32[BPF_REG_FP]; + const s8 *r1 = bpf2rv32[BPF_REG_1]; + + bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); + stack_adjust += bpf_stack_adjust; + + store_offset = stack_adjust - 4; + + stack_adjust += 4 * BPF_JIT_SCRATCH_REGS; + + /* + * The first instruction sets the tail-call-counter (TCC) register. + * This instruction is skipped by tail calls. + */ + emit(rv_addi(RV_REG_TCC, RV_REG_ZERO, MAX_TAIL_CALL_CNT), ctx); + + emit(rv_addi(RV_REG_SP, RV_REG_SP, -stack_adjust), ctx); + + /* Save callee-save registers. */ + emit(rv_sw(RV_REG_SP, store_offset - 0, RV_REG_RA), ctx); + emit(rv_sw(RV_REG_SP, store_offset - 4, RV_REG_FP), ctx); + emit(rv_sw(RV_REG_SP, store_offset - 8, RV_REG_S1), ctx); + emit(rv_sw(RV_REG_SP, store_offset - 12, RV_REG_S2), ctx); + emit(rv_sw(RV_REG_SP, store_offset - 16, RV_REG_S3), ctx); + emit(rv_sw(RV_REG_SP, store_offset - 20, RV_REG_S4), ctx); + emit(rv_sw(RV_REG_SP, store_offset - 24, RV_REG_S5), ctx); + emit(rv_sw(RV_REG_SP, store_offset - 28, RV_REG_S6), ctx); + emit(rv_sw(RV_REG_SP, store_offset - 32, RV_REG_S7), ctx); + + /* Set fp: used as the base address for stacked BPF registers. */ + emit(rv_addi(RV_REG_FP, RV_REG_SP, stack_adjust), ctx); + + /* Set up BPF stack pointer. */ + emit(rv_addi(lo(fp), RV_REG_SP, bpf_stack_adjust), ctx); + emit(rv_addi(hi(fp), RV_REG_ZERO, 0), ctx); + + /* Set up context pointer. */ + emit(rv_addi(lo(r1), RV_REG_A0, 0), ctx); + emit(rv_addi(hi(r1), RV_REG_ZERO, 0), ctx); + + ctx->stack_size = stack_adjust; +} + +void bpf_jit_build_epilogue(struct rv_jit_context *ctx) +{ + __build_epilogue(false, ctx); +} diff --git a/arch/riscv/net/bpf_jit_comp.c b/arch/riscv/net/bpf_jit_comp64.c index 483f4ad7f4dc..cc1985d8750a 100644 --- a/arch/riscv/net/bpf_jit_comp.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -7,42 +7,7 @@ #include <linux/bpf.h> #include <linux/filter.h> -#include <asm/cacheflush.h> - -enum { - RV_REG_ZERO = 0, /* The constant value 0 */ - RV_REG_RA = 1, /* Return address */ - RV_REG_SP = 2, /* Stack pointer */ - RV_REG_GP = 3, /* Global pointer */ - RV_REG_TP = 4, /* Thread pointer */ - RV_REG_T0 = 5, /* Temporaries */ - RV_REG_T1 = 6, - RV_REG_T2 = 7, - RV_REG_FP = 8, - RV_REG_S1 = 9, /* Saved registers */ - RV_REG_A0 = 10, /* Function argument/return values */ - RV_REG_A1 = 11, /* Function arguments */ - RV_REG_A2 = 12, - RV_REG_A3 = 13, - RV_REG_A4 = 14, - RV_REG_A5 = 15, - RV_REG_A6 = 16, - RV_REG_A7 = 17, - RV_REG_S2 = 18, /* Saved registers */ - RV_REG_S3 = 19, - RV_REG_S4 = 20, - RV_REG_S5 = 21, - RV_REG_S6 = 22, - RV_REG_S7 = 23, - RV_REG_S8 = 24, - RV_REG_S9 = 25, - RV_REG_S10 = 26, - RV_REG_S11 = 27, - RV_REG_T3 = 28, /* Temporaries */ - RV_REG_T4 = 29, - RV_REG_T5 = 30, - RV_REG_T6 = 31, -}; +#include "bpf_jit.h" #define RV_REG_TCC RV_REG_A6 #define RV_REG_TCC_SAVED RV_REG_S6 /* Store A6 in S6 if program do calls */ @@ -73,22 +38,6 @@ enum { RV_CTX_F_SEEN_S6 = RV_REG_S6, }; -struct rv_jit_context { - struct bpf_prog *prog; - u32 *insns; /* RV insns */ - int ninsns; - int epilogue_offset; - int *offset; /* BPF to RV */ - unsigned long flags; - int stack_size; -}; - -struct rv_jit_data { - struct bpf_binary_header *header; - u8 *image; - struct rv_jit_context ctx; -}; - static u8 bpf_to_rv_reg(int bpf_reg, struct rv_jit_context *ctx) { u8 reg = regmap[bpf_reg]; @@ -156,346 +105,11 @@ static u8 rv_tail_call_reg(struct rv_jit_context *ctx) return RV_REG_A6; } -static void emit(const u32 insn, struct rv_jit_context *ctx) -{ - if (ctx->insns) - ctx->insns[ctx->ninsns] = insn; - - ctx->ninsns++; -} - -static u32 rv_r_insn(u8 funct7, u8 rs2, u8 rs1, u8 funct3, u8 rd, u8 opcode) -{ - return (funct7 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | - (rd << 7) | opcode; -} - -static u32 rv_i_insn(u16 imm11_0, u8 rs1, u8 funct3, u8 rd, u8 opcode) -{ - return (imm11_0 << 20) | (rs1 << 15) | (funct3 << 12) | (rd << 7) | - opcode; -} - -static u32 rv_s_insn(u16 imm11_0, u8 rs2, u8 rs1, u8 funct3, u8 opcode) -{ - u8 imm11_5 = imm11_0 >> 5, imm4_0 = imm11_0 & 0x1f; - - return (imm11_5 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | - (imm4_0 << 7) | opcode; -} - -static u32 rv_sb_insn(u16 imm12_1, u8 rs2, u8 rs1, u8 funct3, u8 opcode) -{ - u8 imm12 = ((imm12_1 & 0x800) >> 5) | ((imm12_1 & 0x3f0) >> 4); - u8 imm4_1 = ((imm12_1 & 0xf) << 1) | ((imm12_1 & 0x400) >> 10); - - return (imm12 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | - (imm4_1 << 7) | opcode; -} - -static u32 rv_u_insn(u32 imm31_12, u8 rd, u8 opcode) -{ - return (imm31_12 << 12) | (rd << 7) | opcode; -} - -static u32 rv_uj_insn(u32 imm20_1, u8 rd, u8 opcode) -{ - u32 imm; - - imm = (imm20_1 & 0x80000) | ((imm20_1 & 0x3ff) << 9) | - ((imm20_1 & 0x400) >> 2) | ((imm20_1 & 0x7f800) >> 11); - - return (imm << 12) | (rd << 7) | opcode; -} - -static u32 rv_amo_insn(u8 funct5, u8 aq, u8 rl, u8 rs2, u8 rs1, - u8 funct3, u8 rd, u8 opcode) -{ - u8 funct7 = (funct5 << 2) | (aq << 1) | rl; - - return rv_r_insn(funct7, rs2, rs1, funct3, rd, opcode); -} - -static u32 rv_addiw(u8 rd, u8 rs1, u16 imm11_0) -{ - return rv_i_insn(imm11_0, rs1, 0, rd, 0x1b); -} - -static u32 rv_addi(u8 rd, u8 rs1, u16 imm11_0) -{ - return rv_i_insn(imm11_0, rs1, 0, rd, 0x13); -} - -static u32 rv_addw(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0, rs2, rs1, 0, rd, 0x3b); -} - -static u32 rv_add(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0, rs2, rs1, 0, rd, 0x33); -} - -static u32 rv_subw(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0x20, rs2, rs1, 0, rd, 0x3b); -} - -static u32 rv_sub(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0x20, rs2, rs1, 0, rd, 0x33); -} - -static u32 rv_and(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0, rs2, rs1, 7, rd, 0x33); -} - -static u32 rv_or(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0, rs2, rs1, 6, rd, 0x33); -} - -static u32 rv_xor(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0, rs2, rs1, 4, rd, 0x33); -} - -static u32 rv_mulw(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(1, rs2, rs1, 0, rd, 0x3b); -} - -static u32 rv_mul(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(1, rs2, rs1, 0, rd, 0x33); -} - -static u32 rv_divuw(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(1, rs2, rs1, 5, rd, 0x3b); -} - -static u32 rv_divu(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(1, rs2, rs1, 5, rd, 0x33); -} - -static u32 rv_remuw(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(1, rs2, rs1, 7, rd, 0x3b); -} - -static u32 rv_remu(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(1, rs2, rs1, 7, rd, 0x33); -} - -static u32 rv_sllw(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0, rs2, rs1, 1, rd, 0x3b); -} - -static u32 rv_sll(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0, rs2, rs1, 1, rd, 0x33); -} - -static u32 rv_srlw(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0, rs2, rs1, 5, rd, 0x3b); -} - -static u32 rv_srl(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0, rs2, rs1, 5, rd, 0x33); -} - -static u32 rv_sraw(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0x20, rs2, rs1, 5, rd, 0x3b); -} - -static u32 rv_sra(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0x20, rs2, rs1, 5, rd, 0x33); -} - -static u32 rv_lui(u8 rd, u32 imm31_12) -{ - return rv_u_insn(imm31_12, rd, 0x37); -} - -static u32 rv_slli(u8 rd, u8 rs1, u16 imm11_0) -{ - return rv_i_insn(imm11_0, rs1, 1, rd, 0x13); -} - -static u32 rv_andi(u8 rd, u8 rs1, u16 imm11_0) -{ - return rv_i_insn(imm11_0, rs1, 7, rd, 0x13); -} - -static u32 rv_ori(u8 rd, u8 rs1, u16 imm11_0) -{ - return rv_i_insn(imm11_0, rs1, 6, rd, 0x13); -} - -static u32 rv_xori(u8 rd, u8 rs1, u16 imm11_0) -{ - return rv_i_insn(imm11_0, rs1, 4, rd, 0x13); -} - -static u32 rv_slliw(u8 rd, u8 rs1, u16 imm11_0) -{ - return rv_i_insn(imm11_0, rs1, 1, rd, 0x1b); -} - -static u32 rv_srliw(u8 rd, u8 rs1, u16 imm11_0) -{ - return rv_i_insn(imm11_0, rs1, 5, rd, 0x1b); -} - -static u32 rv_srli(u8 rd, u8 rs1, u16 imm11_0) -{ - return rv_i_insn(imm11_0, rs1, 5, rd, 0x13); -} - -static u32 rv_sraiw(u8 rd, u8 rs1, u16 imm11_0) -{ - return rv_i_insn(0x400 | imm11_0, rs1, 5, rd, 0x1b); -} - -static u32 rv_srai(u8 rd, u8 rs1, u16 imm11_0) -{ - return rv_i_insn(0x400 | imm11_0, rs1, 5, rd, 0x13); -} - -static u32 rv_jal(u8 rd, u32 imm20_1) -{ - return rv_uj_insn(imm20_1, rd, 0x6f); -} - -static u32 rv_jalr(u8 rd, u8 rs1, u16 imm11_0) -{ - return rv_i_insn(imm11_0, rs1, 0, rd, 0x67); -} - -static u32 rv_beq(u8 rs1, u8 rs2, u16 imm12_1) -{ - return rv_sb_insn(imm12_1, rs2, rs1, 0, 0x63); -} - -static u32 rv_bltu(u8 rs1, u8 rs2, u16 imm12_1) -{ - return rv_sb_insn(imm12_1, rs2, rs1, 6, 0x63); -} - -static u32 rv_bgeu(u8 rs1, u8 rs2, u16 imm12_1) -{ - return rv_sb_insn(imm12_1, rs2, rs1, 7, 0x63); -} - -static u32 rv_bne(u8 rs1, u8 rs2, u16 imm12_1) -{ - return rv_sb_insn(imm12_1, rs2, rs1, 1, 0x63); -} - -static u32 rv_blt(u8 rs1, u8 rs2, u16 imm12_1) -{ - return rv_sb_insn(imm12_1, rs2, rs1, 4, 0x63); -} - -static u32 rv_bge(u8 rs1, u8 rs2, u16 imm12_1) -{ - return rv_sb_insn(imm12_1, rs2, rs1, 5, 0x63); -} - -static u32 rv_sb(u8 rs1, u16 imm11_0, u8 rs2) -{ - return rv_s_insn(imm11_0, rs2, rs1, 0, 0x23); -} - -static u32 rv_sh(u8 rs1, u16 imm11_0, u8 rs2) -{ - return rv_s_insn(imm11_0, rs2, rs1, 1, 0x23); -} - -static u32 rv_sw(u8 rs1, u16 imm11_0, u8 rs2) -{ - return rv_s_insn(imm11_0, rs2, rs1, 2, 0x23); -} - -static u32 rv_sd(u8 rs1, u16 imm11_0, u8 rs2) -{ - return rv_s_insn(imm11_0, rs2, rs1, 3, 0x23); -} - -static u32 rv_lbu(u8 rd, u16 imm11_0, u8 rs1) -{ - return rv_i_insn(imm11_0, rs1, 4, rd, 0x03); -} - -static u32 rv_lhu(u8 rd, u16 imm11_0, u8 rs1) -{ - return rv_i_insn(imm11_0, rs1, 5, rd, 0x03); -} - -static u32 rv_lwu(u8 rd, u16 imm11_0, u8 rs1) -{ - return rv_i_insn(imm11_0, rs1, 6, rd, 0x03); -} - -static u32 rv_ld(u8 rd, u16 imm11_0, u8 rs1) -{ - return rv_i_insn(imm11_0, rs1, 3, rd, 0x03); -} - -static u32 rv_amoadd_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) -{ - return rv_amo_insn(0, aq, rl, rs2, rs1, 2, rd, 0x2f); -} - -static u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) -{ - return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f); -} - -static u32 rv_auipc(u8 rd, u32 imm31_12) -{ - return rv_u_insn(imm31_12, rd, 0x17); -} - -static bool is_12b_int(s64 val) -{ - return -(1 << 11) <= val && val < (1 << 11); -} - -static bool is_13b_int(s64 val) -{ - return -(1 << 12) <= val && val < (1 << 12); -} - -static bool is_21b_int(s64 val) -{ - return -(1L << 20) <= val && val < (1L << 20); -} - static bool is_32b_int(s64 val) { return -(1L << 31) <= val && val < (1L << 31); } -static int is_12b_check(int off, int insn) -{ - if (!is_12b_int(off)) { - pr_err("bpf-jit: insn=%d 12b < offset=%d not supported yet!\n", - insn, (int)off); - return -1; - } - return 0; -} - static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx) { /* Note that the immediate from the add is sign-extended, @@ -535,23 +149,6 @@ static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx) emit(rv_addi(rd, rd, lower), ctx); } -static int rv_offset(int insn, int off, struct rv_jit_context *ctx) -{ - int from, to; - - off++; /* BPF branch is from PC+1, RV is from PC */ - from = (insn > 0) ? ctx->offset[insn - 1] : 0; - to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0; - return (to - from) << 2; -} - -static int epilogue_offset(struct rv_jit_context *ctx) -{ - int to = ctx->epilogue_offset, from = ctx->ninsns; - - return (to - from) << 2; -} - static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) { int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 8; @@ -596,34 +193,6 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) ctx); } -/* return -1 or inverted cond */ -static int invert_bpf_cond(u8 cond) -{ - switch (cond) { - case BPF_JEQ: - return BPF_JNE; - case BPF_JGT: - return BPF_JLE; - case BPF_JLT: - return BPF_JGE; - case BPF_JGE: - return BPF_JLT; - case BPF_JLE: - return BPF_JGT; - case BPF_JNE: - return BPF_JEQ; - case BPF_JSGT: - return BPF_JSLE; - case BPF_JSLT: - return BPF_JSGE; - case BPF_JSGE: - return BPF_JSLT; - case BPF_JSLE: - return BPF_JSGT; - } - return -1; -} - static void emit_bcc(u8 cond, u8 rd, u8 rs, int rvoff, struct rv_jit_context *ctx) { @@ -855,8 +424,8 @@ static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx) return 0; } -static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, - bool extra_pass) +int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, + bool extra_pass) { bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 || BPF_CLASS(insn->code) == BPF_JMP; @@ -1434,7 +1003,7 @@ out_be: return 0; } -static void build_prologue(struct rv_jit_context *ctx) +void bpf_jit_build_prologue(struct rv_jit_context *ctx) { int stack_adjust = 0, store_offset, bpf_stack_adjust; @@ -1515,175 +1084,11 @@ static void build_prologue(struct rv_jit_context *ctx) ctx->stack_size = stack_adjust; } -static void build_epilogue(struct rv_jit_context *ctx) +void bpf_jit_build_epilogue(struct rv_jit_context *ctx) { __build_epilogue(false, ctx); } -static int build_body(struct rv_jit_context *ctx, bool extra_pass, int *offset) -{ - const struct bpf_prog *prog = ctx->prog; - int i; - - for (i = 0; i < prog->len; i++) { - const struct bpf_insn *insn = &prog->insnsi[i]; - int ret; - - ret = emit_insn(insn, ctx, extra_pass); - if (ret > 0) { - i++; - if (offset) - offset[i] = ctx->ninsns; - continue; - } - if (offset) - offset[i] = ctx->ninsns; - if (ret) - return ret; - } - return 0; -} - -static void bpf_fill_ill_insns(void *area, unsigned int size) -{ - memset(area, 0, size); -} - -static void bpf_flush_icache(void *start, void *end) -{ - flush_icache_range((unsigned long)start, (unsigned long)end); -} - -bool bpf_jit_needs_zext(void) -{ - return true; -} - -struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) -{ - bool tmp_blinded = false, extra_pass = false; - struct bpf_prog *tmp, *orig_prog = prog; - int pass = 0, prev_ninsns = 0, i; - struct rv_jit_data *jit_data; - unsigned int image_size = 0; - struct rv_jit_context *ctx; - - if (!prog->jit_requested) - return orig_prog; - - tmp = bpf_jit_blind_constants(prog); - if (IS_ERR(tmp)) - return orig_prog; - if (tmp != prog) { - tmp_blinded = true; - prog = tmp; - } - - jit_data = prog->aux->jit_data; - if (!jit_data) { - jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); - if (!jit_data) { - prog = orig_prog; - goto out; - } - prog->aux->jit_data = jit_data; - } - - ctx = &jit_data->ctx; - - if (ctx->offset) { - extra_pass = true; - image_size = sizeof(u32) * ctx->ninsns; - goto skip_init_ctx; - } - - ctx->prog = prog; - ctx->offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL); - if (!ctx->offset) { - prog = orig_prog; - goto out_offset; - } - for (i = 0; i < prog->len; i++) { - prev_ninsns += 32; - ctx->offset[i] = prev_ninsns; - } - - for (i = 0; i < 16; i++) { - pass++; - ctx->ninsns = 0; - if (build_body(ctx, extra_pass, ctx->offset)) { - prog = orig_prog; - goto out_offset; - } - build_prologue(ctx); - ctx->epilogue_offset = ctx->ninsns; - build_epilogue(ctx); - - if (ctx->ninsns == prev_ninsns) { - if (jit_data->header) - break; - - image_size = sizeof(u32) * ctx->ninsns; - jit_data->header = - bpf_jit_binary_alloc(image_size, - &jit_data->image, - sizeof(u32), - bpf_fill_ill_insns); - if (!jit_data->header) { - prog = orig_prog; - goto out_offset; - } - - ctx->insns = (u32 *)jit_data->image; - /* Now, when the image is allocated, the image - * can potentially shrink more (auipc/jalr -> - * jal). - */ - } - prev_ninsns = ctx->ninsns; - } - - if (i == 16) { - pr_err("bpf-jit: image did not converge in <%d passes!\n", i); - bpf_jit_binary_free(jit_data->header); - prog = orig_prog; - goto out_offset; - } - -skip_init_ctx: - pass++; - ctx->ninsns = 0; - - build_prologue(ctx); - if (build_body(ctx, extra_pass, NULL)) { - bpf_jit_binary_free(jit_data->header); - prog = orig_prog; - goto out_offset; - } - build_epilogue(ctx); - - if (bpf_jit_enable > 1) - bpf_jit_dump(prog->len, image_size, pass, ctx->insns); - - prog->bpf_func = (void *)ctx->insns; - prog->jited = 1; - prog->jited_len = image_size; - - bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns); - - if (!prog->is_func || extra_pass) { -out_offset: - kfree(ctx->offset); - kfree(jit_data); - prog->aux->jit_data = NULL; - } -out: - if (tmp_blinded) - bpf_jit_prog_release_other(prog, prog == orig_prog ? - tmp : orig_prog); - return prog; -} - void *bpf_jit_alloc_exec(unsigned long size) { return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c new file mode 100644 index 000000000000..709b94ece3ed --- /dev/null +++ b/arch/riscv/net/bpf_jit_core.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Common functionality for RV32 and RV64 BPF JIT compilers + * + * Copyright (c) 2019 Björn Töpel <bjorn.topel@gmail.com> + * + */ + +#include <linux/bpf.h> +#include <linux/filter.h> +#include "bpf_jit.h" + +/* Number of iterations to try until offsets converge. */ +#define NR_JIT_ITERATIONS 16 + +static int build_body(struct rv_jit_context *ctx, bool extra_pass, int *offset) +{ + const struct bpf_prog *prog = ctx->prog; + int i; + + for (i = 0; i < prog->len; i++) { + const struct bpf_insn *insn = &prog->insnsi[i]; + int ret; + + ret = bpf_jit_emit_insn(insn, ctx, extra_pass); + /* BPF_LD | BPF_IMM | BPF_DW: skip the next instruction. */ + if (ret > 0) + i++; + if (offset) + offset[i] = ctx->ninsns; + if (ret < 0) + return ret; + } + return 0; +} + +bool bpf_jit_needs_zext(void) +{ + return true; +} + +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) +{ + bool tmp_blinded = false, extra_pass = false; + struct bpf_prog *tmp, *orig_prog = prog; + int pass = 0, prev_ninsns = 0, i; + struct rv_jit_data *jit_data; + struct rv_jit_context *ctx; + unsigned int image_size = 0; + + if (!prog->jit_requested) + return orig_prog; + + tmp = bpf_jit_blind_constants(prog); + if (IS_ERR(tmp)) + return orig_prog; + if (tmp != prog) { + tmp_blinded = true; + prog = tmp; + } + + jit_data = prog->aux->jit_data; + if (!jit_data) { + jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); + if (!jit_data) { + prog = orig_prog; + goto out; + } + prog->aux->jit_data = jit_data; + } + + ctx = &jit_data->ctx; + + if (ctx->offset) { + extra_pass = true; + image_size = sizeof(u32) * ctx->ninsns; + goto skip_init_ctx; + } + + ctx->prog = prog; + ctx->offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL); + if (!ctx->offset) { + prog = orig_prog; + goto out_offset; + } + for (i = 0; i < prog->len; i++) { + prev_ninsns += 32; + ctx->offset[i] = prev_ninsns; + } + + for (i = 0; i < NR_JIT_ITERATIONS; i++) { + pass++; + ctx->ninsns = 0; + if (build_body(ctx, extra_pass, ctx->offset)) { + prog = orig_prog; + goto out_offset; + } + bpf_jit_build_prologue(ctx); + ctx->epilogue_offset = ctx->ninsns; + bpf_jit_build_epilogue(ctx); + + if (ctx->ninsns == prev_ninsns) { + if (jit_data->header) + break; + + image_size = sizeof(u32) * ctx->ninsns; + jit_data->header = + bpf_jit_binary_alloc(image_size, + &jit_data->image, + sizeof(u32), + bpf_fill_ill_insns); + if (!jit_data->header) { + prog = orig_prog; + goto out_offset; + } + + ctx->insns = (u32 *)jit_data->image; + /* + * Now, when the image is allocated, the image can + * potentially shrink more (auipc/jalr -> jal). + */ + } + prev_ninsns = ctx->ninsns; + } + + if (i == NR_JIT_ITERATIONS) { + pr_err("bpf-jit: image did not converge in <%d passes!\n", i); + bpf_jit_binary_free(jit_data->header); + prog = orig_prog; + goto out_offset; + } + +skip_init_ctx: + pass++; + ctx->ninsns = 0; + + bpf_jit_build_prologue(ctx); + if (build_body(ctx, extra_pass, NULL)) { + bpf_jit_binary_free(jit_data->header); + prog = orig_prog; + goto out_offset; + } + bpf_jit_build_epilogue(ctx); + + if (bpf_jit_enable > 1) + bpf_jit_dump(prog->len, image_size, pass, ctx->insns); + + prog->bpf_func = (void *)ctx->insns; + prog->jited = 1; + prog->jited_len = image_size; + + bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns); + + if (!prog->is_func || extra_pass) { +out_offset: + kfree(ctx->offset); + kfree(jit_data); + prog->aux->jit_data = NULL; + } +out: + + if (tmp_blinded) + bpf_jit_prog_release_other(prog, prog == orig_prog ? + tmp : orig_prog); + return prog; +} diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 137a3920ca36..6d7c3b7e9281 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -752,6 +752,12 @@ static inline int pmd_write(pmd_t pmd) return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0; } +#define pud_write pud_write +static inline int pud_write(pud_t pud) +{ + return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0; +} + static inline int pmd_dirty(pmd_t pmd) { return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0; diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index bc61ea18e88d..60716d18ce5a 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -424,7 +424,7 @@ static void zpci_map_resources(struct pci_dev *pdev) if (zpci_use_mio(zdev)) pdev->resource[i].start = - (resource_size_t __force) zdev->bars[i].mio_wb; + (resource_size_t __force) zdev->bars[i].mio_wt; else pdev->resource[i].start = (resource_size_t __force) pci_iomap_range_fh(pdev, i, 0, 0); @@ -531,7 +531,7 @@ static int zpci_setup_bus_resources(struct zpci_dev *zdev, flags |= IORESOURCE_MEM_64; if (zpci_use_mio(zdev)) - addr = (unsigned long) zdev->bars[i].mio_wb; + addr = (unsigned long) zdev->bars[i].mio_wt; else addr = ZPCI_ADDR(entry); size = 1UL << zdev->bars[i].size; diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 94df0868804b..513a55562d75 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -194,9 +194,10 @@ avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1) avx512_instr :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,-DCONFIG_AS_AVX512=1) sha1_ni_instr :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA1_NI=1) sha256_ni_instr :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA256_NI=1) +adx_instr := $(call as-instr,adox %r10$(comma)%r10,-DCONFIG_AS_ADX=1) -KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) -KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) +KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) $(adx_instr) +KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) $(adx_instr) KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE) diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index b69e00bf20b8..8c2e9eadee8a 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -11,6 +11,7 @@ avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ avx512_supported :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,yes,no) sha1_ni_supported :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,yes,no) sha256_ni_supported :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,yes,no) +adx_supported := $(call as-instr,adox %r10$(comma)%r10,yes,no) obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o @@ -39,7 +40,11 @@ obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o -obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o + +# These modules require the assembler to support ADX. +ifeq ($(adx_supported),yes) + obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o +endif # These modules require assembler to support AVX. ifeq ($(avx_supported),yes) diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h index 02c6ef8f7667..07344d82e88e 100644 --- a/arch/x86/include/asm/io_bitmap.h +++ b/arch/x86/include/asm/io_bitmap.h @@ -19,7 +19,14 @@ struct task_struct; void io_bitmap_share(struct task_struct *tsk); void io_bitmap_exit(void); -void tss_update_io_bitmap(void); +void native_tss_update_io_bitmap(void); + +#ifdef CONFIG_PARAVIRT_XXL +#include <asm/paravirt.h> +#else +#define tss_update_io_bitmap native_tss_update_io_bitmap +#endif + #else static inline void io_bitmap_share(struct task_struct *tsk) { } static inline void io_bitmap_exit(void) { } diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 86e7317eb31f..694d8daf4983 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -295,6 +295,13 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g); } +#ifdef CONFIG_X86_IOPL_IOPERM +static inline void tss_update_io_bitmap(void) +{ + PVOP_VCALL0(cpu.update_io_bitmap); +} +#endif + static inline void paravirt_activate_mm(struct mm_struct *prev, struct mm_struct *next) { diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 84812964d3dd..732f62e04ddb 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -140,6 +140,10 @@ struct pv_cpu_ops { void (*load_sp0)(unsigned long sp0); +#ifdef CONFIG_X86_IOPL_IOPERM + void (*update_io_bitmap)(void); +#endif + void (*wbinvd)(void); /* cpuid emulation, mostly so that caps bits can be disabled */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 52c9bfbbdb2a..4cdb123ff66a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -445,7 +445,7 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c) * cpuid bit to be set. We need to ensure that we * update that bit in this CPU's "cpu_info". */ - get_cpu_cap(c); + set_cpu_cap(c, X86_FEATURE_OSPKE); } #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index d817f255aed8..6efe0410fb72 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -425,7 +425,29 @@ static void __init sev_map_percpu_data(void) } } +static bool pv_tlb_flush_supported(void) +{ + return (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && + !kvm_para_has_hint(KVM_HINTS_REALTIME) && + kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)); +} + +static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask); + #ifdef CONFIG_SMP + +static bool pv_ipi_supported(void) +{ + return kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI); +} + +static bool pv_sched_yield_supported(void) +{ + return (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) && + !kvm_para_has_hint(KVM_HINTS_REALTIME) && + kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)); +} + #define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG) static void __send_ipi_mask(const struct cpumask *mask, int vector) @@ -490,12 +512,12 @@ static void kvm_send_ipi_mask(const struct cpumask *mask, int vector) static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector) { unsigned int this_cpu = smp_processor_id(); - struct cpumask new_mask; + struct cpumask *new_mask = this_cpu_cpumask_var_ptr(__pv_cpu_mask); const struct cpumask *local_mask; - cpumask_copy(&new_mask, mask); - cpumask_clear_cpu(this_cpu, &new_mask); - local_mask = &new_mask; + cpumask_copy(new_mask, mask); + cpumask_clear_cpu(this_cpu, new_mask); + local_mask = new_mask; __send_ipi_mask(local_mask, vector); } @@ -575,7 +597,6 @@ static void __init kvm_apf_trap_init(void) update_intr_gate(X86_TRAP_PF, async_page_fault); } -static DEFINE_PER_CPU(cpumask_var_t, __pv_tlb_mask); static void kvm_flush_tlb_others(const struct cpumask *cpumask, const struct flush_tlb_info *info) @@ -583,7 +604,7 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask, u8 state; int cpu; struct kvm_steal_time *src; - struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_tlb_mask); + struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask); cpumask_copy(flushmask, cpumask); /* @@ -619,11 +640,10 @@ static void __init kvm_guest_init(void) pv_ops.time.steal_clock = kvm_steal_clock; } - if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && - !kvm_para_has_hint(KVM_HINTS_REALTIME) && - kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { + if (pv_tlb_flush_supported()) { pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others; pv_ops.mmu.tlb_remove_table = tlb_remove_table; + pr_info("KVM setup pv remote TLB flush\n"); } if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) @@ -632,9 +652,7 @@ static void __init kvm_guest_init(void) #ifdef CONFIG_SMP smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus; smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; - if (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) && - !kvm_para_has_hint(KVM_HINTS_REALTIME) && - kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { + if (pv_sched_yield_supported()) { smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi; pr_info("KVM setup pv sched yield\n"); } @@ -700,7 +718,7 @@ static uint32_t __init kvm_detect(void) static void __init kvm_apic_init(void) { #if defined(CONFIG_SMP) - if (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI)) + if (pv_ipi_supported()) kvm_setup_pv_ipi(); #endif } @@ -732,26 +750,31 @@ static __init int activate_jump_labels(void) } arch_initcall(activate_jump_labels); -static __init int kvm_setup_pv_tlb_flush(void) +static __init int kvm_alloc_cpumask(void) { int cpu; + bool alloc = false; if (!kvm_para_available() || nopv) return 0; - if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && - !kvm_para_has_hint(KVM_HINTS_REALTIME) && - kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { + if (pv_tlb_flush_supported()) + alloc = true; + +#if defined(CONFIG_SMP) + if (pv_ipi_supported()) + alloc = true; +#endif + + if (alloc) for_each_possible_cpu(cpu) { - zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), + zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu), GFP_KERNEL, cpu_to_node(cpu)); } - pr_info("KVM setup pv remote TLB flush\n"); - } return 0; } -arch_initcall(kvm_setup_pv_tlb_flush); +arch_initcall(kvm_alloc_cpumask); #ifdef CONFIG_PARAVIRT_SPINLOCKS diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 789f5e4f89de..c131ba4e70ef 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -30,6 +30,7 @@ #include <asm/timer.h> #include <asm/special_insns.h> #include <asm/tlb.h> +#include <asm/io_bitmap.h> /* * nop stub, which must not clobber anything *including the stack* to @@ -341,6 +342,10 @@ struct paravirt_patch_template pv_ops = { .cpu.iret = native_iret, .cpu.swapgs = native_swapgs, +#ifdef CONFIG_X86_IOPL_IOPERM + .cpu.update_io_bitmap = native_tss_update_io_bitmap, +#endif + .cpu.start_context_switch = paravirt_nop, .cpu.end_context_switch = paravirt_nop, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 839b5244e3b7..3053c85e0e42 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -374,7 +374,7 @@ static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm) /** * tss_update_io_bitmap - Update I/O bitmap before exiting to usermode */ -void tss_update_io_bitmap(void) +void native_tss_update_io_bitmap(void) { struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); struct thread_struct *t = ¤t->thread; diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 991019d5eee1..1bb4927030af 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -59,6 +59,19 @@ config KVM If unsure, say N. +config KVM_WERROR + bool "Compile KVM with -Werror" + # KASAN may cause the build to fail due to larger frames + default y if X86_64 && !KASAN + # We use the dependency on !COMPILE_TEST to not be enabled + # blindly in allmodconfig or allyesconfig configurations + depends on (X86_64 && !KASAN) || !COMPILE_TEST + depends on EXPERT + help + Add -Werror to the build flags for (and only for) i915.ko. + + If in doubt, say "N". + config KVM_INTEL tristate "KVM for Intel (and compatible) processors support" depends on KVM && IA32_FEAT_CTL diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index b19ef421084d..e553f0fdd87d 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 ccflags-y += -Iarch/x86/kvm +ccflags-$(CONFIG_KVM_WERROR) += -Werror KVM := ../../../virt/kvm diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ad3f5b178a03..24c0b2ba8fb9 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -57,11 +57,13 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); +#ifdef MODULE static const struct x86_cpu_id svm_cpu_id[] = { X86_FEATURE_MATCH(X86_FEATURE_SVM), {} }; MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id); +#endif #define IOPM_ALLOC_ORDER 2 #define MSRPM_ALLOC_ORDER 1 @@ -2194,8 +2196,9 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) static int avic_init_vcpu(struct vcpu_svm *svm) { int ret; + struct kvm_vcpu *vcpu = &svm->vcpu; - if (!kvm_vcpu_apicv_active(&svm->vcpu)) + if (!avic || !irqchip_in_kernel(vcpu->kvm)) return 0; ret = avic_init_backing_page(&svm->vcpu); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 63aaf44edd1f..40b1e6138cd5 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -64,11 +64,13 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); +#ifdef MODULE static const struct x86_cpu_id vmx_cpu_id[] = { X86_FEATURE_MATCH(X86_FEATURE_VMX), {} }; MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id); +#endif bool __read_mostly enable_vpid = 1; module_param_named(vpid, enable_vpid, bool, 0444); @@ -7175,6 +7177,7 @@ static int vmx_check_intercept_io(struct kvm_vcpu *vcpu, else intercept = nested_vmx_check_io_bitmaps(vcpu, port, size); + /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */ return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; } @@ -7204,6 +7207,20 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, case x86_intercept_outs: return vmx_check_intercept_io(vcpu, info); + case x86_intercept_lgdt: + case x86_intercept_lidt: + case x86_intercept_lldt: + case x86_intercept_ltr: + case x86_intercept_sgdt: + case x86_intercept_sidt: + case x86_intercept_sldt: + case x86_intercept_str: + if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC)) + return X86EMUL_CONTINUE; + + /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */ + break; + /* TODO: check more intercepts... */ default: break; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 359fcd395132..5de200663f51 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7190,15 +7190,15 @@ static void kvm_timer_init(void) if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { #ifdef CONFIG_CPU_FREQ - struct cpufreq_policy policy; + struct cpufreq_policy *policy; int cpu; - memset(&policy, 0, sizeof(policy)); cpu = get_cpu(); - cpufreq_get_policy(&policy, cpu); - if (policy.cpuinfo.max_freq) - max_tsc_khz = policy.cpuinfo.max_freq; + policy = cpufreq_cpu_get(cpu); + if (policy && policy->cpuinfo.max_freq) + max_tsc_khz = policy->cpuinfo.max_freq; put_cpu(); + cpufreq_cpu_put(policy); #endif cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); @@ -7308,12 +7308,12 @@ int kvm_arch_init(void *opaque) } if (!ops->cpu_has_kvm_support()) { - printk(KERN_ERR "kvm: no hardware support\n"); + pr_err_ratelimited("kvm: no hardware support\n"); r = -EOPNOTSUPP; goto out; } if (ops->disabled_by_bios()) { - printk(KERN_ERR "kvm: disabled by bios\n"); + pr_err_ratelimited("kvm: disabled by bios\n"); r = -EOPNOTSUPP; goto out; } diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 64229dad7eab..69309cd56fdf 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -363,13 +363,8 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, { const struct ptdump_range ptdump_ranges[] = { #ifdef CONFIG_X86_64 - -#define normalize_addr_shift (64 - (__VIRTUAL_MASK_SHIFT + 1)) -#define normalize_addr(u) ((signed long)((u) << normalize_addr_shift) >> \ - normalize_addr_shift) - {0, PTRS_PER_PGD * PGD_LEVEL_MULT / 2}, - {normalize_addr(PTRS_PER_PGD * PGD_LEVEL_MULT / 2), ~0UL}, + {GUARD_HOLE_END_ADDR, ~0UL}, #else {0, ~0UL}, #endif diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 23df4885bbed..eb6ede2c3d43 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -238,7 +238,11 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) } } -static inline int is_kernel_text(unsigned long addr) +/* + * The <linux/kallsyms.h> already defines is_kernel_text, + * using '__' prefix not to get in conflict. + */ +static inline int __is_kernel_text(unsigned long addr) { if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end) return 1; @@ -328,8 +332,8 @@ repeat: addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; - if (is_kernel_text(addr) || - is_kernel_text(addr2)) + if (__is_kernel_text(addr) || + __is_kernel_text(addr2)) prot = PAGE_KERNEL_LARGE_EXEC; pages_2m++; @@ -354,7 +358,7 @@ repeat: */ pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR); - if (is_kernel_text(addr)) + if (__is_kernel_text(addr)) prot = PAGE_KERNEL_EXEC; pages_4k++; @@ -881,7 +885,7 @@ static void mark_nxdata_nx(void) */ unsigned long start = PFN_ALIGN(_etext); /* - * This comes from is_kernel_text upper limit. Also HPAGE where used: + * This comes from __is_kernel_text upper limit. Also HPAGE where used: */ unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start; diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 9ba08e9abc09..5ea7c2cf7ab4 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1361,37 +1361,140 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args, -(stack_size - i * 8)); } +static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, + struct bpf_prog *p, int stack_size, bool mod_ret) +{ + u8 *prog = *pprog; + int cnt = 0; + + if (emit_call(&prog, __bpf_prog_enter, prog)) + return -EINVAL; + /* remember prog start time returned by __bpf_prog_enter */ + emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0); + + /* arg1: lea rdi, [rbp - stack_size] */ + EMIT4(0x48, 0x8D, 0x7D, -stack_size); + /* arg2: progs[i]->insnsi for interpreter */ + if (!p->jited) + emit_mov_imm64(&prog, BPF_REG_2, + (long) p->insnsi >> 32, + (u32) (long) p->insnsi); + /* call JITed bpf program or interpreter */ + if (emit_call(&prog, p->bpf_func, prog)) + return -EINVAL; + + /* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return + * of the previous call which is then passed on the stack to + * the next BPF program. + */ + if (mod_ret) + emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); + + /* arg1: mov rdi, progs[i] */ + emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, + (u32) (long) p); + /* arg2: mov rsi, rbx <- start time in nsec */ + emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); + if (emit_call(&prog, __bpf_prog_exit, prog)) + return -EINVAL; + + *pprog = prog; + return 0; +} + +static void emit_nops(u8 **pprog, unsigned int len) +{ + unsigned int i, noplen; + u8 *prog = *pprog; + int cnt = 0; + + while (len > 0) { + noplen = len; + + if (noplen > ASM_NOP_MAX) + noplen = ASM_NOP_MAX; + + for (i = 0; i < noplen; i++) + EMIT1(ideal_nops[noplen][i]); + len -= noplen; + } + + *pprog = prog; +} + +static void emit_align(u8 **pprog, u32 align) +{ + u8 *target, *prog = *pprog; + + target = PTR_ALIGN(prog, align); + if (target != prog) + emit_nops(&prog, target - prog); + + *pprog = prog; +} + +static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) +{ + u8 *prog = *pprog; + int cnt = 0; + s64 offset; + + offset = func - (ip + 2 + 4); + if (!is_simm32(offset)) { + pr_err("Target %p is out of range\n", func); + return -EINVAL; + } + EMIT2_off32(0x0F, jmp_cond + 0x10, offset); + *pprog = prog; + return 0; +} + static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, - struct bpf_prog **progs, int prog_cnt, int stack_size) + struct bpf_tramp_progs *tp, int stack_size) { + int i; u8 *prog = *pprog; - int cnt = 0, i; - for (i = 0; i < prog_cnt; i++) { - if (emit_call(&prog, __bpf_prog_enter, prog)) - return -EINVAL; - /* remember prog start time returned by __bpf_prog_enter */ - emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0); - - /* arg1: lea rdi, [rbp - stack_size] */ - EMIT4(0x48, 0x8D, 0x7D, -stack_size); - /* arg2: progs[i]->insnsi for interpreter */ - if (!progs[i]->jited) - emit_mov_imm64(&prog, BPF_REG_2, - (long) progs[i]->insnsi >> 32, - (u32) (long) progs[i]->insnsi); - /* call JITed bpf program or interpreter */ - if (emit_call(&prog, progs[i]->bpf_func, prog)) + for (i = 0; i < tp->nr_progs; i++) { + if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, false)) return -EINVAL; + } + *pprog = prog; + return 0; +} + +static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, + struct bpf_tramp_progs *tp, int stack_size, + u8 **branches) +{ + u8 *prog = *pprog; + int i, cnt = 0; - /* arg1: mov rdi, progs[i] */ - emit_mov_imm64(&prog, BPF_REG_1, (long) progs[i] >> 32, - (u32) (long) progs[i]); - /* arg2: mov rsi, rbx <- start time in nsec */ - emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); - if (emit_call(&prog, __bpf_prog_exit, prog)) + /* The first fmod_ret program will receive a garbage return value. + * Set this to 0 to avoid confusing the program. + */ + emit_mov_imm32(&prog, false, BPF_REG_0, 0); + emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); + for (i = 0; i < tp->nr_progs; i++) { + if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, true)) return -EINVAL; + + /* mod_ret prog stored return value into [rbp - 8]. Emit: + * if (*(u64 *)(rbp - 8) != 0) + * goto do_fexit; + */ + /* cmp QWORD PTR [rbp - 0x8], 0x0 */ + EMIT4(0x48, 0x83, 0x7d, 0xf8); EMIT1(0x00); + + /* Save the location of the branch and Generate 6 nops + * (4 bytes for an offset and 2 bytes for the jump) These nops + * are replaced with a conditional jump once do_fexit (i.e. the + * start of the fexit invocation) is finalized. + */ + branches[i] = prog; + emit_nops(&prog, 4 + 2); } + *pprog = prog; return 0; } @@ -1458,12 +1561,15 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, */ int arch_prepare_bpf_trampoline(void *image, void *image_end, const struct btf_func_model *m, u32 flags, - struct bpf_prog **fentry_progs, int fentry_cnt, - struct bpf_prog **fexit_progs, int fexit_cnt, + struct bpf_tramp_progs *tprogs, void *orig_call) { - int cnt = 0, nr_args = m->nr_args; + int ret, i, cnt = 0, nr_args = m->nr_args; int stack_size = nr_args * 8; + struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY]; + struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT]; + struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN]; + u8 **branches = NULL; u8 *prog; /* x86-64 supports up to 6 arguments. 7+ can be added in the future */ @@ -1492,28 +1598,64 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, save_regs(m, &prog, nr_args, stack_size); - if (fentry_cnt) - if (invoke_bpf(m, &prog, fentry_progs, fentry_cnt, stack_size)) + if (fentry->nr_progs) + if (invoke_bpf(m, &prog, fentry, stack_size)) return -EINVAL; + if (fmod_ret->nr_progs) { + branches = kcalloc(fmod_ret->nr_progs, sizeof(u8 *), + GFP_KERNEL); + if (!branches) + return -ENOMEM; + + if (invoke_bpf_mod_ret(m, &prog, fmod_ret, stack_size, + branches)) { + ret = -EINVAL; + goto cleanup; + } + } + if (flags & BPF_TRAMP_F_CALL_ORIG) { - if (fentry_cnt) + if (fentry->nr_progs || fmod_ret->nr_progs) restore_regs(m, &prog, nr_args, stack_size); /* call original function */ - if (emit_call(&prog, orig_call, prog)) - return -EINVAL; + if (emit_call(&prog, orig_call, prog)) { + ret = -EINVAL; + goto cleanup; + } /* remember return value in a stack for bpf prog to access */ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); } - if (fexit_cnt) - if (invoke_bpf(m, &prog, fexit_progs, fexit_cnt, stack_size)) - return -EINVAL; + if (fmod_ret->nr_progs) { + /* From Intel 64 and IA-32 Architectures Optimization + * Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler + * Coding Rule 11: All branch targets should be 16-byte + * aligned. + */ + emit_align(&prog, 16); + /* Update the branches saved in invoke_bpf_mod_ret with the + * aligned address of do_fexit. + */ + for (i = 0; i < fmod_ret->nr_progs; i++) + emit_cond_near_jump(&branches[i], prog, branches[i], + X86_JNE); + } + + if (fexit->nr_progs) + if (invoke_bpf(m, &prog, fexit, stack_size)) { + ret = -EINVAL; + goto cleanup; + } if (flags & BPF_TRAMP_F_RESTORE_REGS) restore_regs(m, &prog, nr_args, stack_size); + /* This needs to be done regardless. If there were fmod_ret programs, + * the return value is only updated on the stack and still needs to be + * restored to R0. + */ if (flags & BPF_TRAMP_F_CALL_ORIG) /* restore original return value back into RAX */ emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); @@ -1525,45 +1667,15 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ EMIT1(0xC3); /* ret */ /* Make sure the trampoline generation logic doesn't overflow */ - if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) - return -EFAULT; - return prog - (u8 *)image; -} - -static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) -{ - u8 *prog = *pprog; - int cnt = 0; - s64 offset; - - offset = func - (ip + 2 + 4); - if (!is_simm32(offset)) { - pr_err("Target %p is out of range\n", func); - return -EINVAL; - } - EMIT2_off32(0x0F, jmp_cond + 0x10, offset); - *pprog = prog; - return 0; -} - -static void emit_nops(u8 **pprog, unsigned int len) -{ - unsigned int i, noplen; - u8 *prog = *pprog; - int cnt = 0; - - while (len > 0) { - noplen = len; - - if (noplen > ASM_NOP_MAX) - noplen = ASM_NOP_MAX; - - for (i = 0; i < noplen; i++) - EMIT1(ideal_nops[noplen][i]); - len -= noplen; + if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) { + ret = -EFAULT; + goto cleanup; } + ret = prog - (u8 *)image; - *pprog = prog; +cleanup: + kfree(branches); + return ret; } static int emit_fallback_jump(u8 **pprog) @@ -1588,7 +1700,7 @@ static int emit_fallback_jump(u8 **pprog) static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs) { - u8 *jg_reloc, *jg_target, *prog = *pprog; + u8 *jg_reloc, *prog = *pprog; int pivot, err, jg_bytes = 1, cnt = 0; s64 jg_offset; @@ -1643,9 +1755,7 @@ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs) * Coding Rule 11: All branch targets should be 16-byte * aligned. */ - jg_target = PTR_ALIGN(prog, 16); - if (jg_target != prog) - emit_nops(&prog, jg_target - prog); + emit_align(&prog, 16); jg_offset = prog - jg_reloc; emit_code(jg_reloc - jg_bytes, jg_offset, jg_bytes); diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index fa8506e76bbe..d19a2edd63cb 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -180,7 +180,7 @@ void efi_sync_low_kernel_mappings(void) static inline phys_addr_t virt_to_phys_or_null_size(void *va, unsigned long size) { - bool bad_size; + phys_addr_t pa; if (!va) return 0; @@ -188,16 +188,13 @@ virt_to_phys_or_null_size(void *va, unsigned long size) if (virt_addr_valid(va)) return virt_to_phys(va); - /* - * A fully aligned variable on the stack is guaranteed not to - * cross a page bounary. Try to catch strings on the stack by - * checking that 'size' is a power of two. - */ - bad_size = size > PAGE_SIZE || !is_power_of_2(size); + pa = slow_virt_to_phys(va); - WARN_ON(!IS_ALIGNED((unsigned long)va, size) || bad_size); + /* check if the object crosses a page boundary */ + if (WARN_ON((pa ^ (pa + size - 1)) & PAGE_MASK)) + return 0; - return slow_virt_to_phys(va); + return pa; } #define virt_to_phys_or_null(addr) \ @@ -568,85 +565,25 @@ efi_thunk_set_virtual_address_map(unsigned long memory_map_size, static efi_status_t efi_thunk_get_time(efi_time_t *tm, efi_time_cap_t *tc) { - efi_status_t status; - u32 phys_tm, phys_tc; - unsigned long flags; - - spin_lock(&rtc_lock); - spin_lock_irqsave(&efi_runtime_lock, flags); - - phys_tm = virt_to_phys_or_null(tm); - phys_tc = virt_to_phys_or_null(tc); - - status = efi_thunk(get_time, phys_tm, phys_tc); - - spin_unlock_irqrestore(&efi_runtime_lock, flags); - spin_unlock(&rtc_lock); - - return status; + return EFI_UNSUPPORTED; } static efi_status_t efi_thunk_set_time(efi_time_t *tm) { - efi_status_t status; - u32 phys_tm; - unsigned long flags; - - spin_lock(&rtc_lock); - spin_lock_irqsave(&efi_runtime_lock, flags); - - phys_tm = virt_to_phys_or_null(tm); - - status = efi_thunk(set_time, phys_tm); - - spin_unlock_irqrestore(&efi_runtime_lock, flags); - spin_unlock(&rtc_lock); - - return status; + return EFI_UNSUPPORTED; } static efi_status_t efi_thunk_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, efi_time_t *tm) { - efi_status_t status; - u32 phys_enabled, phys_pending, phys_tm; - unsigned long flags; - - spin_lock(&rtc_lock); - spin_lock_irqsave(&efi_runtime_lock, flags); - - phys_enabled = virt_to_phys_or_null(enabled); - phys_pending = virt_to_phys_or_null(pending); - phys_tm = virt_to_phys_or_null(tm); - - status = efi_thunk(get_wakeup_time, phys_enabled, - phys_pending, phys_tm); - - spin_unlock_irqrestore(&efi_runtime_lock, flags); - spin_unlock(&rtc_lock); - - return status; + return EFI_UNSUPPORTED; } static efi_status_t efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) { - efi_status_t status; - u32 phys_tm; - unsigned long flags; - - spin_lock(&rtc_lock); - spin_lock_irqsave(&efi_runtime_lock, flags); - - phys_tm = virt_to_phys_or_null(tm); - - status = efi_thunk(set_wakeup_time, enabled, phys_tm); - - spin_unlock_irqrestore(&efi_runtime_lock, flags); - spin_unlock(&rtc_lock); - - return status; + return EFI_UNSUPPORTED; } static unsigned long efi_name_size(efi_char16_t *name) @@ -658,6 +595,8 @@ static efi_status_t efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor, u32 *attr, unsigned long *data_size, void *data) { + u8 buf[24] __aligned(8); + efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd)); efi_status_t status; u32 phys_name, phys_vendor, phys_attr; u32 phys_data_size, phys_data; @@ -665,14 +604,19 @@ efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor, spin_lock_irqsave(&efi_runtime_lock, flags); + *vnd = *vendor; + phys_data_size = virt_to_phys_or_null(data_size); - phys_vendor = virt_to_phys_or_null(vendor); + phys_vendor = virt_to_phys_or_null(vnd); phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); phys_attr = virt_to_phys_or_null(attr); phys_data = virt_to_phys_or_null_size(data, *data_size); - status = efi_thunk(get_variable, phys_name, phys_vendor, - phys_attr, phys_data_size, phys_data); + if (!phys_name || (data && !phys_data)) + status = EFI_INVALID_PARAMETER; + else + status = efi_thunk(get_variable, phys_name, phys_vendor, + phys_attr, phys_data_size, phys_data); spin_unlock_irqrestore(&efi_runtime_lock, flags); @@ -683,19 +627,25 @@ static efi_status_t efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor, u32 attr, unsigned long data_size, void *data) { + u8 buf[24] __aligned(8); + efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd)); u32 phys_name, phys_vendor, phys_data; efi_status_t status; unsigned long flags; spin_lock_irqsave(&efi_runtime_lock, flags); + *vnd = *vendor; + phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); - phys_vendor = virt_to_phys_or_null(vendor); + phys_vendor = virt_to_phys_or_null(vnd); phys_data = virt_to_phys_or_null_size(data, data_size); - /* If data_size is > sizeof(u32) we've got problems */ - status = efi_thunk(set_variable, phys_name, phys_vendor, - attr, data_size, phys_data); + if (!phys_name || !phys_data) + status = EFI_INVALID_PARAMETER; + else + status = efi_thunk(set_variable, phys_name, phys_vendor, + attr, data_size, phys_data); spin_unlock_irqrestore(&efi_runtime_lock, flags); @@ -707,6 +657,8 @@ efi_thunk_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor, u32 attr, unsigned long data_size, void *data) { + u8 buf[24] __aligned(8); + efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd)); u32 phys_name, phys_vendor, phys_data; efi_status_t status; unsigned long flags; @@ -714,13 +666,17 @@ efi_thunk_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor, if (!spin_trylock_irqsave(&efi_runtime_lock, flags)) return EFI_NOT_READY; + *vnd = *vendor; + phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); - phys_vendor = virt_to_phys_or_null(vendor); + phys_vendor = virt_to_phys_or_null(vnd); phys_data = virt_to_phys_or_null_size(data, data_size); - /* If data_size is > sizeof(u32) we've got problems */ - status = efi_thunk(set_variable, phys_name, phys_vendor, - attr, data_size, phys_data); + if (!phys_name || !phys_data) + status = EFI_INVALID_PARAMETER; + else + status = efi_thunk(set_variable, phys_name, phys_vendor, + attr, data_size, phys_data); spin_unlock_irqrestore(&efi_runtime_lock, flags); @@ -732,39 +688,36 @@ efi_thunk_get_next_variable(unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor) { + u8 buf[24] __aligned(8); + efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd)); efi_status_t status; u32 phys_name_size, phys_name, phys_vendor; unsigned long flags; spin_lock_irqsave(&efi_runtime_lock, flags); + *vnd = *vendor; + phys_name_size = virt_to_phys_or_null(name_size); - phys_vendor = virt_to_phys_or_null(vendor); + phys_vendor = virt_to_phys_or_null(vnd); phys_name = virt_to_phys_or_null_size(name, *name_size); - status = efi_thunk(get_next_variable, phys_name_size, - phys_name, phys_vendor); + if (!phys_name) + status = EFI_INVALID_PARAMETER; + else + status = efi_thunk(get_next_variable, phys_name_size, + phys_name, phys_vendor); spin_unlock_irqrestore(&efi_runtime_lock, flags); + *vendor = *vnd; return status; } static efi_status_t efi_thunk_get_next_high_mono_count(u32 *count) { - efi_status_t status; - u32 phys_count; - unsigned long flags; - - spin_lock_irqsave(&efi_runtime_lock, flags); - - phys_count = virt_to_phys_or_null(count); - status = efi_thunk(get_next_high_mono_count, phys_count); - - spin_unlock_irqrestore(&efi_runtime_lock, flags); - - return status; + return EFI_UNSUPPORTED; } static void diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 79409120a603..507f4fb88fa7 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -72,6 +72,9 @@ #include <asm/mwait.h> #include <asm/pci_x86.h> #include <asm/cpu.h> +#ifdef CONFIG_X86_IOPL_IOPERM +#include <asm/io_bitmap.h> +#endif #ifdef CONFIG_ACPI #include <linux/acpi.h> @@ -837,6 +840,25 @@ static void xen_load_sp0(unsigned long sp0) this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); } +#ifdef CONFIG_X86_IOPL_IOPERM +static void xen_update_io_bitmap(void) +{ + struct physdev_set_iobitmap iobitmap; + struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); + + native_tss_update_io_bitmap(); + + iobitmap.bitmap = (uint8_t *)(&tss->x86_tss) + + tss->x86_tss.io_bitmap_base; + if (tss->x86_tss.io_bitmap_base == IO_BITMAP_OFFSET_INVALID) + iobitmap.nr_ports = 0; + else + iobitmap.nr_ports = IO_BITMAP_BITS; + + HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &iobitmap); +} +#endif + static void xen_io_delay(void) { } @@ -1047,6 +1069,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .write_idt_entry = xen_write_idt_entry, .load_sp0 = xen_load_sp0, +#ifdef CONFIG_X86_IOPL_IOPERM + .update_io_bitmap = xen_update_io_bitmap, +#endif .io_delay = xen_io_delay, /* Xen takes care of %gs when switching to usermode for us */ diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 09b69a3ed490..f0ff6654af28 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -610,12 +610,13 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, */ entity = &bfqg->entity; for_each_entity(entity) { - bfqg = container_of(entity, struct bfq_group, entity); - if (bfqg != bfqd->root_group) { - parent = bfqg_parent(bfqg); + struct bfq_group *curr_bfqg = container_of(entity, + struct bfq_group, entity); + if (curr_bfqg != bfqd->root_group) { + parent = bfqg_parent(curr_bfqg); if (!parent) parent = bfqd->root_group; - bfq_group_set_parent(bfqg, parent); + bfq_group_set_parent(curr_bfqg, parent); } } diff --git a/block/blk-core.c b/block/blk-core.c index 089e890ab208..60dc9552ef8d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1663,12 +1663,6 @@ int kblockd_schedule_work(struct work_struct *work) } EXPORT_SYMBOL(kblockd_schedule_work); -int kblockd_schedule_work_on(int cpu, struct work_struct *work) -{ - return queue_work_on(cpu, kblockd_workqueue, work); -} -EXPORT_SYMBOL(kblockd_schedule_work_on); - int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay) { diff --git a/block/blk-flush.c b/block/blk-flush.c index 3f977c517960..5cc775bdb06a 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -412,7 +412,7 @@ void blk_insert_flush(struct request *rq) */ if ((policy & REQ_FSEQ_DATA) && !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { - blk_mq_request_bypass_insert(rq, false); + blk_mq_request_bypass_insert(rq, false, false); return; } diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index ca22afd47b3d..856356b1619e 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -361,13 +361,19 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, bool has_sched, struct request *rq) { - /* dispatch flush rq directly */ - if (rq->rq_flags & RQF_FLUSH_SEQ) { - spin_lock(&hctx->lock); - list_add(&rq->queuelist, &hctx->dispatch); - spin_unlock(&hctx->lock); + /* + * dispatch flush and passthrough rq directly + * + * passthrough request has to be added to hctx->dispatch directly. + * For some reason, device may be in one situation which can't + * handle FS request, so STS_RESOURCE is always returned and the + * FS request will be added to hctx->dispatch. However passthrough + * request may be required at that time for fixing the problem. If + * passthrough request is added to scheduler queue, there isn't any + * chance to dispatch it given we prioritize requests in hctx->dispatch. + */ + if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq)) return true; - } if (has_sched) rq->rq_flags |= RQF_SORTED; @@ -391,8 +397,10 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head, WARN_ON(e && (rq->tag != -1)); - if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) + if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) { + blk_mq_request_bypass_insert(rq, at_head, false); goto run; + } if (e && e->type->ops.insert_requests) { LIST_HEAD(list); diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index fbacde454718..586c9d6e904a 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -183,8 +183,8 @@ found_tag: return tag + tag_offset; } -void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags, - struct blk_mq_ctx *ctx, unsigned int tag) +void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, + unsigned int tag) { if (!blk_mq_tag_is_reserved(tags, tag)) { const int real_tag = tag - tags->nr_reserved_tags; diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 15bc74acb57e..2b8321efb682 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -26,8 +26,8 @@ extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int r extern void blk_mq_free_tags(struct blk_mq_tags *tags); extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); -extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags, - struct blk_mq_ctx *ctx, unsigned int tag); +extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, + unsigned int tag); extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags **tags, unsigned int depth, bool can_grow); diff --git a/block/blk-mq.c b/block/blk-mq.c index a12b1763508d..d92088dec6c3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -477,9 +477,9 @@ static void __blk_mq_free_request(struct request *rq) blk_pm_mark_last_busy(rq); rq->mq_hctx = NULL; if (rq->tag != -1) - blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag); + blk_mq_put_tag(hctx->tags, ctx, rq->tag); if (sched_tag != -1) - blk_mq_put_tag(hctx, hctx->sched_tags, ctx, sched_tag); + blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag); blk_mq_sched_restart(hctx); blk_queue_exit(q); } @@ -735,7 +735,7 @@ static void blk_mq_requeue_work(struct work_struct *work) * merge. */ if (rq->rq_flags & RQF_DONTPREP) - blk_mq_request_bypass_insert(rq, false); + blk_mq_request_bypass_insert(rq, false, false); else blk_mq_sched_insert_request(rq, true, false, false); } @@ -1286,7 +1286,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, q->mq_ops->commit_rqs(hctx); spin_lock(&hctx->lock); - list_splice_init(list, &hctx->dispatch); + list_splice_tail_init(list, &hctx->dispatch); spin_unlock(&hctx->lock); /* @@ -1677,12 +1677,16 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, * Should only be used carefully, when the caller knows we want to * bypass a potential IO scheduler on the target device. */ -void blk_mq_request_bypass_insert(struct request *rq, bool run_queue) +void blk_mq_request_bypass_insert(struct request *rq, bool at_head, + bool run_queue) { struct blk_mq_hw_ctx *hctx = rq->mq_hctx; spin_lock(&hctx->lock); - list_add_tail(&rq->queuelist, &hctx->dispatch); + if (at_head) + list_add(&rq->queuelist, &hctx->dispatch); + else + list_add_tail(&rq->queuelist, &hctx->dispatch); spin_unlock(&hctx->lock); if (run_queue) @@ -1849,7 +1853,7 @@ insert: if (bypass_insert) return BLK_STS_RESOURCE; - blk_mq_request_bypass_insert(rq, run_queue); + blk_mq_request_bypass_insert(rq, false, run_queue); return BLK_STS_OK; } @@ -1876,7 +1880,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false, true); if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) - blk_mq_request_bypass_insert(rq, true); + blk_mq_request_bypass_insert(rq, false, true); else if (ret != BLK_STS_OK) blk_mq_end_request(rq, ret); @@ -1910,7 +1914,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, if (ret != BLK_STS_OK) { if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) { - blk_mq_request_bypass_insert(rq, + blk_mq_request_bypass_insert(rq, false, list_empty(list)); break; } @@ -3398,7 +3402,6 @@ static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb) } static unsigned long blk_mq_poll_nsecs(struct request_queue *q, - struct blk_mq_hw_ctx *hctx, struct request *rq) { unsigned long ret = 0; @@ -3431,7 +3434,6 @@ static unsigned long blk_mq_poll_nsecs(struct request_queue *q, } static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, - struct blk_mq_hw_ctx *hctx, struct request *rq) { struct hrtimer_sleeper hs; @@ -3451,7 +3453,7 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, if (q->poll_nsec > 0) nsecs = q->poll_nsec; else - nsecs = blk_mq_poll_nsecs(q, hctx, rq); + nsecs = blk_mq_poll_nsecs(q, rq); if (!nsecs) return false; @@ -3506,7 +3508,7 @@ static bool blk_mq_poll_hybrid(struct request_queue *q, return false; } - return blk_mq_poll_hybrid_sleep(q, hctx, rq); + return blk_mq_poll_hybrid_sleep(q, rq); } /** diff --git a/block/blk-mq.h b/block/blk-mq.h index eaaca8fc1c28..10bfdfb494fa 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -66,7 +66,8 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, */ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bool at_head); -void blk_mq_request_bypass_insert(struct request *rq, bool run_queue); +void blk_mq_request_bypass_insert(struct request *rq, bool at_head, + bool run_queue); void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct list_head *list); @@ -199,7 +200,7 @@ static inline bool blk_mq_get_dispatch_budget(struct blk_mq_hw_ctx *hctx) static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, struct request *rq) { - blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag); + blk_mq_put_tag(hctx->tags, rq->mq_ctx, rq->tag); rq->tag = -1; if (rq->rq_flags & RQF_MQ_INFLIGHT) { diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c index b5516b04ffc0..6e9ec6e3fe47 100644 --- a/drivers/acpi/acpi_watchdog.c +++ b/drivers/acpi/acpi_watchdog.c @@ -55,12 +55,14 @@ static bool acpi_watchdog_uses_rtc(const struct acpi_table_wdat *wdat) } #endif +static bool acpi_no_watchdog; + static const struct acpi_table_wdat *acpi_watchdog_get_wdat(void) { const struct acpi_table_wdat *wdat = NULL; acpi_status status; - if (acpi_disabled) + if (acpi_disabled || acpi_no_watchdog) return NULL; status = acpi_get_table(ACPI_SIG_WDAT, 0, @@ -88,6 +90,14 @@ bool acpi_has_watchdog(void) } EXPORT_SYMBOL_GPL(acpi_has_watchdog); +/* ACPI watchdog can be disabled on boot command line */ +static int __init disable_acpi_watchdog(char *str) +{ + acpi_no_watchdog = true; + return 1; +} +__setup("acpi_no_watchdog", disable_acpi_watchdog); + void __init acpi_watchdog_init(void) { const struct acpi_wdat_entry *entries; @@ -126,12 +136,11 @@ void __init acpi_watchdog_init(void) gas = &entries[i].register_region; res.start = gas->address; + res.end = res.start + ACPI_ACCESS_BYTE_WIDTH(gas->access_width) - 1; if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { res.flags = IORESOURCE_MEM; - res.end = res.start + ALIGN(gas->access_width, 4) - 1; } else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { res.flags = IORESOURCE_IO; - res.end = res.start + gas->access_width - 1; } else { pr_warn("Unsupported address space: %u\n", gas->space_id); diff --git a/drivers/android/binder.c b/drivers/android/binder.c index a6b2082c24f8..e47c8a4c83db 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -5228,6 +5228,7 @@ static int binder_open(struct inode *nodp, struct file *filp) binder_dev = container_of(filp->private_data, struct binder_device, miscdev); } + refcount_inc(&binder_dev->ref); proc->context = &binder_dev->context; binder_alloc_init(&proc->alloc); @@ -5405,6 +5406,7 @@ static int binder_node_release(struct binder_node *node, int refs) static void binder_deferred_release(struct binder_proc *proc) { struct binder_context *context = proc->context; + struct binder_device *device; struct rb_node *n; int threads, nodes, incoming_refs, outgoing_refs, active_transactions; @@ -5421,6 +5423,12 @@ static void binder_deferred_release(struct binder_proc *proc) context->binder_context_mgr_node = NULL; } mutex_unlock(&context->context_mgr_node_lock); + device = container_of(proc->context, struct binder_device, context); + if (refcount_dec_and_test(&device->ref)) { + kfree(context->name); + kfree(device); + } + proc->context = NULL; binder_inner_proc_lock(proc); /* * Make sure proc stays alive after we @@ -6077,6 +6085,7 @@ static int __init init_binder_device(const char *name) binder_device->miscdev.minor = MISC_DYNAMIC_MINOR; binder_device->miscdev.name = name; + refcount_set(&binder_device->ref, 1); binder_device->context.binder_context_mgr_uid = INVALID_UID; binder_device->context.name = name; mutex_init(&binder_device->context.context_mgr_node_lock); diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index ae991097d14d..283d3cb9c16e 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -8,6 +8,7 @@ #include <linux/list.h> #include <linux/miscdevice.h> #include <linux/mutex.h> +#include <linux/refcount.h> #include <linux/stddef.h> #include <linux/types.h> #include <linux/uidgid.h> @@ -33,6 +34,7 @@ struct binder_device { struct miscdevice miscdev; struct binder_context context; struct inode *binderfs_inode; + refcount_t ref; }; /** diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index e2580e5316a2..110e41f920c2 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -154,6 +154,7 @@ static int binderfs_binder_device_create(struct inode *ref_inode, if (!name) goto err; + refcount_set(&device->ref, 1); device->binderfs_inode = inode; device->context.binder_context_mgr_uid = INVALID_UID; device->context.name = name; @@ -257,8 +258,10 @@ static void binderfs_evict_inode(struct inode *inode) ida_free(&binderfs_minors, device->miscdev.minor); mutex_unlock(&binderfs_minors_mutex); - kfree(device->context.name); - kfree(device); + if (refcount_dec_and_test(&device->ref)) { + kfree(device->context.name); + kfree(device); + } } /** diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index 8db8c0fb5e2d..7af74fb450a0 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -91,7 +91,7 @@ #ifdef GENERAL_DEBUG #define PRINTK(args...) printk(args) #else -#define PRINTK(args...) +#define PRINTK(args...) do {} while (0) #endif /* GENERAL_DEBUG */ #ifdef EXTRA_DEBUG diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig index b8313a04422d..48efa7a047f3 100644 --- a/drivers/auxdisplay/Kconfig +++ b/drivers/auxdisplay/Kconfig @@ -111,7 +111,7 @@ config CFAG12864B If unsure, say N. config CFAG12864B_RATE - int "Refresh rate (hertz)" + int "Refresh rate (hertz)" depends on CFAG12864B default "20" ---help--- @@ -329,7 +329,7 @@ config PANEL_LCD_PROTO config PANEL_LCD_PIN_E depends on PANEL_PROFILE="0" && PANEL_LCD="1" && PANEL_LCD_PROTO="0" - int "Parallel port pin number & polarity connected to the LCD E signal (-17...17) " + int "Parallel port pin number & polarity connected to the LCD E signal (-17...17) " range -17 17 default 14 ---help--- @@ -344,7 +344,7 @@ config PANEL_LCD_PIN_E config PANEL_LCD_PIN_RS depends on PANEL_PROFILE="0" && PANEL_LCD="1" && PANEL_LCD_PROTO="0" - int "Parallel port pin number & polarity connected to the LCD RS signal (-17...17) " + int "Parallel port pin number & polarity connected to the LCD RS signal (-17...17) " range -17 17 default 17 ---help--- @@ -359,7 +359,7 @@ config PANEL_LCD_PIN_RS config PANEL_LCD_PIN_RW depends on PANEL_PROFILE="0" && PANEL_LCD="1" && PANEL_LCD_PROTO="0" - int "Parallel port pin number & polarity connected to the LCD RW signal (-17...17) " + int "Parallel port pin number & polarity connected to the LCD RW signal (-17...17) " range -17 17 default 16 ---help--- @@ -374,7 +374,7 @@ config PANEL_LCD_PIN_RW config PANEL_LCD_PIN_SCL depends on PANEL_PROFILE="0" && PANEL_LCD="1" && PANEL_LCD_PROTO!="0" - int "Parallel port pin number & polarity connected to the LCD SCL signal (-17...17) " + int "Parallel port pin number & polarity connected to the LCD SCL signal (-17...17) " range -17 17 default 1 ---help--- @@ -389,7 +389,7 @@ config PANEL_LCD_PIN_SCL config PANEL_LCD_PIN_SDA depends on PANEL_PROFILE="0" && PANEL_LCD="1" && PANEL_LCD_PROTO!="0" - int "Parallel port pin number & polarity connected to the LCD SDA signal (-17...17) " + int "Parallel port pin number & polarity connected to the LCD SDA signal (-17...17) " range -17 17 default 2 ---help--- @@ -404,12 +404,12 @@ config PANEL_LCD_PIN_SDA config PANEL_LCD_PIN_BL depends on PANEL_PROFILE="0" && PANEL_LCD="1" - int "Parallel port pin number & polarity connected to the LCD backlight signal (-17...17) " + int "Parallel port pin number & polarity connected to the LCD backlight signal (-17...17) " range -17 17 default 0 ---help--- This describes the number of the parallel port pin to which the LCD 'BL' signal - has been connected. It can be : + has been connected. It can be : 0 : no connection (eg: connected to ground) 1..17 : directly connected to any of these pins on the DB25 plug diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c index 874c259a8829..c0da3820454b 100644 --- a/drivers/auxdisplay/charlcd.c +++ b/drivers/auxdisplay/charlcd.c @@ -88,7 +88,7 @@ struct charlcd_priv { int len; } esc_seq; - unsigned long long drvdata[0]; + unsigned long long drvdata[]; }; #define charlcd_to_priv(p) container_of(p, struct charlcd_priv, lcd) diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c index efb928e25aef..1cce409ce5ca 100644 --- a/drivers/auxdisplay/img-ascii-lcd.c +++ b/drivers/auxdisplay/img-ascii-lcd.c @@ -356,7 +356,6 @@ static int img_ascii_lcd_probe(struct platform_device *pdev) const struct of_device_id *match; const struct img_ascii_lcd_config *cfg; struct img_ascii_lcd_ctx *ctx; - struct resource *res; int err; match = of_match_device(img_ascii_lcd_matches, &pdev->dev); @@ -378,8 +377,7 @@ static int img_ascii_lcd_probe(struct platform_device *pdev) &ctx->offset)) return -EINVAL; } else { - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - ctx->base = devm_ioremap_resource(&pdev->dev, res); + ctx->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(ctx->base)) return PTR_ERR(ctx->base); } diff --git a/drivers/base/core.c b/drivers/base/core.c index fb8b7990f6fd..befc2722dbfc 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -718,6 +718,8 @@ static void __device_links_queue_sync_state(struct device *dev, { struct device_link *link; + if (!dev_has_sync_state(dev)) + return; if (dev->state_synced) return; @@ -745,25 +747,31 @@ static void __device_links_queue_sync_state(struct device *dev, /** * device_links_flush_sync_list - Call sync_state() on a list of devices * @list: List of devices to call sync_state() on + * @dont_lock_dev: Device for which lock is already held by the caller * * Calls sync_state() on all the devices that have been queued for it. This - * function is used in conjunction with __device_links_queue_sync_state(). + * function is used in conjunction with __device_links_queue_sync_state(). The + * @dont_lock_dev parameter is useful when this function is called from a + * context where a device lock is already held. */ -static void device_links_flush_sync_list(struct list_head *list) +static void device_links_flush_sync_list(struct list_head *list, + struct device *dont_lock_dev) { struct device *dev, *tmp; list_for_each_entry_safe(dev, tmp, list, links.defer_sync) { list_del_init(&dev->links.defer_sync); - device_lock(dev); + if (dev != dont_lock_dev) + device_lock(dev); if (dev->bus->sync_state) dev->bus->sync_state(dev); else if (dev->driver && dev->driver->sync_state) dev->driver->sync_state(dev); - device_unlock(dev); + if (dev != dont_lock_dev) + device_unlock(dev); put_device(dev); } @@ -801,7 +809,7 @@ void device_links_supplier_sync_state_resume(void) out: device_links_write_unlock(); - device_links_flush_sync_list(&sync_list); + device_links_flush_sync_list(&sync_list, NULL); } static int sync_state_resume_initcall(void) @@ -813,7 +821,7 @@ late_initcall(sync_state_resume_initcall); static void __device_links_supplier_defer_sync(struct device *sup) { - if (list_empty(&sup->links.defer_sync)) + if (list_empty(&sup->links.defer_sync) && dev_has_sync_state(sup)) list_add_tail(&sup->links.defer_sync, &deferred_sync); } @@ -865,6 +873,11 @@ void device_links_driver_bound(struct device *dev) driver_deferred_probe_add(link->consumer); } + if (defer_sync_state_count) + __device_links_supplier_defer_sync(dev); + else + __device_links_queue_sync_state(dev, &sync_list); + list_for_each_entry(link, &dev->links.suppliers, c_node) { if (!(link->flags & DL_FLAG_MANAGED)) continue; @@ -883,7 +896,7 @@ void device_links_driver_bound(struct device *dev) device_links_write_unlock(); - device_links_flush_sync_list(&sync_list); + device_links_flush_sync_list(&sync_list, dev); } static void device_link_drop_managed(struct device_link *link) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 7fa654f1288b..b5ce7b085795 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -363,10 +363,10 @@ static void setup_pdev_dma_masks(struct platform_device *pdev) { if (!pdev->dev.coherent_dma_mask) pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32); - if (!pdev->dma_mask) - pdev->dma_mask = DMA_BIT_MASK(32); - if (!pdev->dev.dma_mask) - pdev->dev.dma_mask = &pdev->dma_mask; + if (!pdev->dev.dma_mask) { + pdev->platform_dma_mask = DMA_BIT_MASK(32); + pdev->dev.dma_mask = &pdev->platform_dma_mask; + } }; /** @@ -662,20 +662,8 @@ struct platform_device *platform_device_register_full( pdev->dev.of_node_reused = pdevinfo->of_node_reused; if (pdevinfo->dma_mask) { - /* - * This memory isn't freed when the device is put, - * I don't have a nice idea for that though. Conceptually - * dma_mask in struct device should not be a pointer. - * See http://thread.gmane.org/gmane.linux.kernel.pci/9081 - */ - pdev->dev.dma_mask = - kmalloc(sizeof(*pdev->dev.dma_mask), GFP_KERNEL); - if (!pdev->dev.dma_mask) - goto err; - - kmemleak_ignore(pdev->dev.dma_mask); - - *pdev->dev.dma_mask = pdevinfo->dma_mask; + pdev->platform_dma_mask = pdevinfo->dma_mask; + pdev->dev.dma_mask = &pdev->platform_dma_mask; pdev->dev.coherent_dma_mask = pdevinfo->dma_mask; } @@ -700,7 +688,6 @@ struct platform_device *platform_device_register_full( if (ret) { err: ACPI_COMPANION_SET(&pdev->dev, NULL); - kfree(pdev->dev.dma_mask); platform_device_put(pdev); return ERR_PTR(ret); } diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c index 0b081dee1e95..de8d3543e8fe 100644 --- a/drivers/base/swnode.c +++ b/drivers/base/swnode.c @@ -608,6 +608,13 @@ static void software_node_release(struct kobject *kobj) { struct swnode *swnode = kobj_to_swnode(kobj); + if (swnode->parent) { + ida_simple_remove(&swnode->parent->child_ids, swnode->id); + list_del(&swnode->entry); + } else { + ida_simple_remove(&swnode_root_ids, swnode->id); + } + if (swnode->allocated) { property_entries_free(swnode->node->properties); kfree(swnode->node); @@ -773,13 +780,6 @@ void fwnode_remove_software_node(struct fwnode_handle *fwnode) if (!swnode) return; - if (swnode->parent) { - ida_simple_remove(&swnode->parent->child_ids, swnode->id); - list_del(&swnode->entry); - } else { - ida_simple_remove(&swnode_root_ids, swnode->id); - } - kobject_put(&swnode->kobj); } EXPORT_SYMBOL_GPL(fwnode_remove_software_node); diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h index bc837862b767..62b660821dbc 100644 --- a/drivers/block/null_blk.h +++ b/drivers/block/null_blk.h @@ -14,9 +14,6 @@ #include <linux/fault-inject.h> struct nullb_cmd { - struct list_head list; - struct llist_node ll_list; - struct __call_single_data csd; struct request *rq; struct bio *bio; unsigned int tag; diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 16510795e377..133060431dbd 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -1518,8 +1518,6 @@ static int setup_commands(struct nullb_queue *nq) for (i = 0; i < nq->queue_depth; i++) { cmd = &nq->cmds[i]; - INIT_LIST_HEAD(&cmd->list); - cmd->ll_list.next = NULL; cmd->tag = -1U; } diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 117cfc8cd05a..cda5cf917e9a 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -276,7 +276,7 @@ static const struct block_device_operations pcd_bdops = { .release = pcd_block_release, .ioctl = pcd_block_ioctl, #ifdef CONFIG_COMPAT - .ioctl = blkdev_compat_ptr_ioctl, + .compat_ioctl = blkdev_compat_ptr_ioctl, #endif .check_events = pcd_block_check_events, }; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 54158766334b..0736248999b0 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -245,13 +245,20 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); if (err) { virtqueue_kick(vblk->vqs[qid].vq); - blk_mq_stop_hw_queue(hctx); + /* Don't stop the queue if -ENOMEM: we may have failed to + * bounce the buffer due to global resource outage. + */ + if (err == -ENOSPC) + blk_mq_stop_hw_queue(hctx); spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); - /* Out of mem doesn't actually happen, since we fall back - * to direct descriptors */ - if (err == -ENOMEM || err == -ENOSPC) + switch (err) { + case -ENOSPC: return BLK_STS_DEV_RESOURCE; - return BLK_STS_IOERR; + case -ENOMEM: + return BLK_STS_RESOURCE; + default: + return BLK_STS_IOERR; + } } if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq)) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index e2ad6bba2281..9df516a56bb2 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -213,6 +213,7 @@ struct blkfront_info struct blk_mq_tag_set tag_set; struct blkfront_ring_info *rinfo; unsigned int nr_rings; + unsigned int rinfo_size; /* Save uncomplete reqs and bios for migration. */ struct list_head requests; struct bio_list bio_list; @@ -259,6 +260,18 @@ static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo); static void blkfront_gather_backend_features(struct blkfront_info *info); static int negotiate_mq(struct blkfront_info *info); +#define for_each_rinfo(info, ptr, idx) \ + for ((ptr) = (info)->rinfo, (idx) = 0; \ + (idx) < (info)->nr_rings; \ + (idx)++, (ptr) = (void *)(ptr) + (info)->rinfo_size) + +static inline struct blkfront_ring_info * +get_rinfo(const struct blkfront_info *info, unsigned int i) +{ + BUG_ON(i >= info->nr_rings); + return (void *)info->rinfo + i * info->rinfo_size; +} + static int get_id_from_freelist(struct blkfront_ring_info *rinfo) { unsigned long free = rinfo->shadow_free; @@ -883,8 +896,7 @@ static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx, struct blkfront_info *info = hctx->queue->queuedata; struct blkfront_ring_info *rinfo = NULL; - BUG_ON(info->nr_rings <= qid); - rinfo = &info->rinfo[qid]; + rinfo = get_rinfo(info, qid); blk_mq_start_request(qd->rq); spin_lock_irqsave(&rinfo->ring_lock, flags); if (RING_FULL(&rinfo->ring)) @@ -1181,6 +1193,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, static void xlvbd_release_gendisk(struct blkfront_info *info) { unsigned int minor, nr_minors, i; + struct blkfront_ring_info *rinfo; if (info->rq == NULL) return; @@ -1188,9 +1201,7 @@ static void xlvbd_release_gendisk(struct blkfront_info *info) /* No more blkif_request(). */ blk_mq_stop_hw_queues(info->rq); - for (i = 0; i < info->nr_rings; i++) { - struct blkfront_ring_info *rinfo = &info->rinfo[i]; - + for_each_rinfo(info, rinfo, i) { /* No more gnttab callback work. */ gnttab_cancel_free_callback(&rinfo->callback); @@ -1339,6 +1350,7 @@ free_shadow: static void blkif_free(struct blkfront_info *info, int suspend) { unsigned int i; + struct blkfront_ring_info *rinfo; /* Prevent new requests being issued until we fix things up. */ info->connected = suspend ? @@ -1347,8 +1359,8 @@ static void blkif_free(struct blkfront_info *info, int suspend) if (info->rq) blk_mq_stop_hw_queues(info->rq); - for (i = 0; i < info->nr_rings; i++) - blkif_free_ring(&info->rinfo[i]); + for_each_rinfo(info, rinfo, i) + blkif_free_ring(rinfo); kvfree(info->rinfo); info->rinfo = NULL; @@ -1775,6 +1787,7 @@ static int talk_to_blkback(struct xenbus_device *dev, int err; unsigned int i, max_page_order; unsigned int ring_page_order; + struct blkfront_ring_info *rinfo; if (!info) return -ENODEV; @@ -1788,9 +1801,7 @@ static int talk_to_blkback(struct xenbus_device *dev, if (err) goto destroy_blkring; - for (i = 0; i < info->nr_rings; i++) { - struct blkfront_ring_info *rinfo = &info->rinfo[i]; - + for_each_rinfo(info, rinfo, i) { /* Create shared ring, alloc event channel. */ err = setup_blkring(dev, rinfo); if (err) @@ -1815,7 +1826,7 @@ again: /* We already got the number of queues/rings in _probe */ if (info->nr_rings == 1) { - err = write_per_ring_nodes(xbt, &info->rinfo[0], dev->nodename); + err = write_per_ring_nodes(xbt, info->rinfo, dev->nodename); if (err) goto destroy_blkring; } else { @@ -1837,10 +1848,10 @@ again: goto abort_transaction; } - for (i = 0; i < info->nr_rings; i++) { + for_each_rinfo(info, rinfo, i) { memset(path, 0, pathsize); snprintf(path, pathsize, "%s/queue-%u", dev->nodename, i); - err = write_per_ring_nodes(xbt, &info->rinfo[i], path); + err = write_per_ring_nodes(xbt, rinfo, path); if (err) { kfree(path); goto destroy_blkring; @@ -1868,9 +1879,8 @@ again: goto destroy_blkring; } - for (i = 0; i < info->nr_rings; i++) { + for_each_rinfo(info, rinfo, i) { unsigned int j; - struct blkfront_ring_info *rinfo = &info->rinfo[i]; for (j = 0; j < BLK_RING_SIZE(info); j++) rinfo->shadow[j].req.u.rw.id = j + 1; @@ -1900,6 +1910,7 @@ static int negotiate_mq(struct blkfront_info *info) { unsigned int backend_max_queues; unsigned int i; + struct blkfront_ring_info *rinfo; BUG_ON(info->nr_rings); @@ -1911,20 +1922,16 @@ static int negotiate_mq(struct blkfront_info *info) if (!info->nr_rings) info->nr_rings = 1; - info->rinfo = kvcalloc(info->nr_rings, - struct_size(info->rinfo, shadow, - BLK_RING_SIZE(info)), - GFP_KERNEL); + info->rinfo_size = struct_size(info->rinfo, shadow, + BLK_RING_SIZE(info)); + info->rinfo = kvcalloc(info->nr_rings, info->rinfo_size, GFP_KERNEL); if (!info->rinfo) { xenbus_dev_fatal(info->xbdev, -ENOMEM, "allocating ring_info structure"); info->nr_rings = 0; return -ENOMEM; } - for (i = 0; i < info->nr_rings; i++) { - struct blkfront_ring_info *rinfo; - - rinfo = &info->rinfo[i]; + for_each_rinfo(info, rinfo, i) { INIT_LIST_HEAD(&rinfo->indirect_pages); INIT_LIST_HEAD(&rinfo->grants); rinfo->dev_info = info; @@ -2017,6 +2024,7 @@ static int blkif_recover(struct blkfront_info *info) int rc; struct bio *bio; unsigned int segs; + struct blkfront_ring_info *rinfo; blkfront_gather_backend_features(info); /* Reset limits changed by blk_mq_update_nr_hw_queues(). */ @@ -2024,9 +2032,7 @@ static int blkif_recover(struct blkfront_info *info) segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG); - for (r_index = 0; r_index < info->nr_rings; r_index++) { - struct blkfront_ring_info *rinfo = &info->rinfo[r_index]; - + for_each_rinfo(info, rinfo, r_index) { rc = blkfront_setup_indirect(rinfo); if (rc) return rc; @@ -2036,10 +2042,7 @@ static int blkif_recover(struct blkfront_info *info) /* Now safe for us to use the shared ring */ info->connected = BLKIF_STATE_CONNECTED; - for (r_index = 0; r_index < info->nr_rings; r_index++) { - struct blkfront_ring_info *rinfo; - - rinfo = &info->rinfo[r_index]; + for_each_rinfo(info, rinfo, r_index) { /* Kick any other new requests queued since we resumed */ kick_pending_request_queues(rinfo); } @@ -2072,13 +2075,13 @@ static int blkfront_resume(struct xenbus_device *dev) struct blkfront_info *info = dev_get_drvdata(&dev->dev); int err = 0; unsigned int i, j; + struct blkfront_ring_info *rinfo; dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename); bio_list_init(&info->bio_list); INIT_LIST_HEAD(&info->requests); - for (i = 0; i < info->nr_rings; i++) { - struct blkfront_ring_info *rinfo = &info->rinfo[i]; + for_each_rinfo(info, rinfo, i) { struct bio_list merge_bio; struct blk_shadow *shadow = rinfo->shadow; @@ -2337,6 +2340,7 @@ static void blkfront_connect(struct blkfront_info *info) unsigned int binfo; char *envp[] = { "RESIZE=1", NULL }; int err, i; + struct blkfront_ring_info *rinfo; switch (info->connected) { case BLKIF_STATE_CONNECTED: @@ -2394,8 +2398,8 @@ static void blkfront_connect(struct blkfront_info *info) "physical-sector-size", sector_size); blkfront_gather_backend_features(info); - for (i = 0; i < info->nr_rings; i++) { - err = blkfront_setup_indirect(&info->rinfo[i]); + for_each_rinfo(info, rinfo, i) { + err = blkfront_setup_indirect(rinfo); if (err) { xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s", info->xbdev->otherend); @@ -2416,8 +2420,8 @@ static void blkfront_connect(struct blkfront_info *info) /* Kick pending requests. */ info->connected = BLKIF_STATE_CONNECTED; - for (i = 0; i < info->nr_rings; i++) - kick_pending_request_queues(&info->rinfo[i]); + for_each_rinfo(info, rinfo, i) + kick_pending_request_queues(rinfo); device_add_disk(&info->xbdev->dev, info->gd, NULL); @@ -2652,9 +2656,9 @@ static void purge_persistent_grants(struct blkfront_info *info) { unsigned int i; unsigned long flags; + struct blkfront_ring_info *rinfo; - for (i = 0; i < info->nr_rings; i++) { - struct blkfront_ring_info *rinfo = &info->rinfo[i]; + for_each_rinfo(info, rinfo, i) { struct grant *gnt_list_entry, *tmp; spin_lock_irqsave(&rinfo->ring_lock, flags); diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index f7aa2dc1ff85..4e73a531b377 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -211,12 +211,12 @@ config BT_HCIUART_RTL depends on BT_HCIUART depends on BT_HCIUART_SERDEV depends on GPIOLIB - depends on ACPI + depends on (ACPI || SERIAL_DEV_CTRL_TTYPORT) select BT_HCIUART_3WIRE select BT_RTL help The Realtek protocol support enables Bluetooth HCI over 3-Wire - serial port internface for Realtek Bluetooth controllers. + serial port interface for Realtek Bluetooth controllers. Say Y here to compile support for Realtek protocol. diff --git a/drivers/bluetooth/bfusb.c b/drivers/bluetooth/bfusb.c index 0e5954cac98e..5a321b4076aa 100644 --- a/drivers/bluetooth/bfusb.c +++ b/drivers/bluetooth/bfusb.c @@ -133,8 +133,8 @@ static int bfusb_send_bulk(struct bfusb_data *data, struct sk_buff *skb) err = usb_submit_urb(urb, GFP_ATOMIC); if (err) { - BT_ERR("%s bulk tx submit failed urb %p err %d", - data->hdev->name, urb, err); + bt_dev_err(data->hdev, "bulk tx submit failed urb %p err %d", + urb, err); skb_unlink(skb, &data->pending_q); usb_free_urb(urb); } else @@ -232,8 +232,8 @@ static int bfusb_rx_submit(struct bfusb_data *data, struct urb *urb) err = usb_submit_urb(urb, GFP_ATOMIC); if (err) { - BT_ERR("%s bulk rx submit failed urb %p err %d", - data->hdev->name, urb, err); + bt_dev_err(data->hdev, "bulk rx submit failed urb %p err %d", + urb, err); skb_unlink(skb, &data->pending_q); kfree_skb(skb); usb_free_urb(urb); @@ -247,7 +247,7 @@ static inline int bfusb_recv_block(struct bfusb_data *data, int hdr, unsigned ch BT_DBG("bfusb %p hdr 0x%02x data %p len %d", data, hdr, buf, len); if (hdr & 0x10) { - BT_ERR("%s error in block", data->hdev->name); + bt_dev_err(data->hdev, "error in block"); kfree_skb(data->reassembly); data->reassembly = NULL; return -EIO; @@ -259,13 +259,13 @@ static inline int bfusb_recv_block(struct bfusb_data *data, int hdr, unsigned ch int pkt_len = 0; if (data->reassembly) { - BT_ERR("%s unexpected start block", data->hdev->name); + bt_dev_err(data->hdev, "unexpected start block"); kfree_skb(data->reassembly); data->reassembly = NULL; } if (len < 1) { - BT_ERR("%s no packet type found", data->hdev->name); + bt_dev_err(data->hdev, "no packet type found"); return -EPROTO; } @@ -277,7 +277,7 @@ static inline int bfusb_recv_block(struct bfusb_data *data, int hdr, unsigned ch struct hci_event_hdr *hdr = (struct hci_event_hdr *) buf; pkt_len = HCI_EVENT_HDR_SIZE + hdr->plen; } else { - BT_ERR("%s event block is too short", data->hdev->name); + bt_dev_err(data->hdev, "event block is too short"); return -EILSEQ; } break; @@ -287,7 +287,7 @@ static inline int bfusb_recv_block(struct bfusb_data *data, int hdr, unsigned ch struct hci_acl_hdr *hdr = (struct hci_acl_hdr *) buf; pkt_len = HCI_ACL_HDR_SIZE + __le16_to_cpu(hdr->dlen); } else { - BT_ERR("%s data block is too short", data->hdev->name); + bt_dev_err(data->hdev, "data block is too short"); return -EILSEQ; } break; @@ -297,7 +297,7 @@ static inline int bfusb_recv_block(struct bfusb_data *data, int hdr, unsigned ch struct hci_sco_hdr *hdr = (struct hci_sco_hdr *) buf; pkt_len = HCI_SCO_HDR_SIZE + hdr->dlen; } else { - BT_ERR("%s audio block is too short", data->hdev->name); + bt_dev_err(data->hdev, "audio block is too short"); return -EILSEQ; } break; @@ -305,7 +305,7 @@ static inline int bfusb_recv_block(struct bfusb_data *data, int hdr, unsigned ch skb = bt_skb_alloc(pkt_len, GFP_ATOMIC); if (!skb) { - BT_ERR("%s no memory for the packet", data->hdev->name); + bt_dev_err(data->hdev, "no memory for the packet"); return -ENOMEM; } @@ -314,7 +314,7 @@ static inline int bfusb_recv_block(struct bfusb_data *data, int hdr, unsigned ch data->reassembly = skb; } else { if (!data->reassembly) { - BT_ERR("%s unexpected continuation block", data->hdev->name); + bt_dev_err(data->hdev, "unexpected continuation block"); return -EIO; } } @@ -366,8 +366,7 @@ static void bfusb_rx_complete(struct urb *urb) } if (count < len) { - BT_ERR("%s block extends over URB buffer ranges", - data->hdev->name); + bt_dev_err(data->hdev, "block extends over URB buffer ranges"); } if ((hdr & 0xe1) == 0xc1) @@ -391,8 +390,8 @@ resubmit: err = usb_submit_urb(urb, GFP_ATOMIC); if (err) { - BT_ERR("%s bulk resubmit failed urb %p err %d", - data->hdev->name, urb, err); + bt_dev_err(data->hdev, "bulk resubmit failed urb %p err %d", + urb, err); } unlock: @@ -477,7 +476,7 @@ static int bfusb_send_frame(struct hci_dev *hdev, struct sk_buff *skb) /* Max HCI frame size seems to be 1511 + 1 */ nskb = bt_skb_alloc(count + 32, GFP_KERNEL); if (!nskb) { - BT_ERR("Can't allocate memory for new packet"); + bt_dev_err(hdev, "Can't allocate memory for new packet"); return -ENOMEM; } diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 62e781a18bf0..6a0e2c5a8beb 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -376,13 +376,13 @@ struct ibt_cp_reg_access { __le32 addr; __u8 mode; __u8 len; - __u8 data[0]; + __u8 data[]; } __packed; struct ibt_rp_reg_access { __u8 status; __le32 addr; - __u8 data[0]; + __u8 data[]; } __packed; static int regmap_ibt_read(void *context, const void *addr, size_t reg_size, diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index ec69e5dd7bd3..a16845c0751d 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -139,7 +139,7 @@ int qca_send_pre_shutdown_cmd(struct hci_dev *hdev) EXPORT_SYMBOL_GPL(qca_send_pre_shutdown_cmd); static void qca_tlv_check_data(struct qca_fw_config *config, - const struct firmware *fw) + const struct firmware *fw, enum qca_btsoc_type soc_type) { const u8 *data; u32 type_len; @@ -148,6 +148,7 @@ static void qca_tlv_check_data(struct qca_fw_config *config, struct tlv_type_hdr *tlv; struct tlv_type_patch *tlv_patch; struct tlv_type_nvm *tlv_nvm; + uint8_t nvm_baud_rate = config->user_baud_rate; tlv = (struct tlv_type_hdr *)fw->data; @@ -216,7 +217,10 @@ static void qca_tlv_check_data(struct qca_fw_config *config, tlv_nvm->data[0] |= 0x80; /* UART Baud Rate */ - tlv_nvm->data[2] = config->user_baud_rate; + if (soc_type == QCA_WCN3991) + tlv_nvm->data[1] = nvm_baud_rate; + else + tlv_nvm->data[2] = nvm_baud_rate; break; @@ -354,7 +358,7 @@ static int qca_download_firmware(struct hci_dev *hdev, return ret; } - qca_tlv_check_data(config, fw); + qca_tlv_check_data(config, fw, soc_type); segment = fw->data; remain = fw->size; diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h index f5795b1a3779..e16a4d650597 100644 --- a/drivers/bluetooth/btqca.h +++ b/drivers/bluetooth/btqca.h @@ -79,7 +79,7 @@ struct qca_fw_config { struct edl_event_hdr { __u8 cresp; __u8 rtype; - __u8 data[0]; + __u8 data[]; } __packed; struct qca_btsoc_version { @@ -112,12 +112,12 @@ struct tlv_type_nvm { __le16 tag_len; __le32 reserve1; __le32 reserve2; - __u8 data[0]; + __u8 data[]; } __packed; struct tlv_type_hdr { __le32 type_len; - __u8 data[0]; + __u8 data[]; } __packed; enum qca_btsoc_type { diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index 577cfa3329db..67f4bc21e7c5 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -136,6 +136,18 @@ static const struct id_table ic_id_table[] = { .fw_name = "rtl_bt/rtl8761a_fw.bin", .cfg_name = "rtl_bt/rtl8761a_config" }, + /* 8822C with UART interface */ + { .match_flags = IC_MATCH_FL_LMPSUBV | IC_MATCH_FL_HCIREV | + IC_MATCH_FL_HCIBUS, + .lmp_subver = RTL_ROM_LMP_8822B, + .hci_rev = 0x000c, + .hci_ver = 0x0a, + .hci_bus = HCI_UART, + .config_needed = true, + .has_rom_version = true, + .fw_name = "rtl_bt/rtl8822cs_fw.bin", + .cfg_name = "rtl_bt/rtl8822cs_config" }, + /* 8822C with USB interface */ { IC_INFO(RTL_ROM_LMP_8822B, 0xc), .config_needed = false, diff --git a/drivers/bluetooth/btrtl.h b/drivers/bluetooth/btrtl.h index 10ad40c3e42c..2a582682136d 100644 --- a/drivers/bluetooth/btrtl.h +++ b/drivers/bluetooth/btrtl.h @@ -38,13 +38,13 @@ struct rtl_epatch_header { struct rtl_vendor_config_entry { __le16 offset; __u8 len; - __u8 data[0]; + __u8 data[]; } __packed; struct rtl_vendor_config { __le32 signature; __le16 total_len; - struct rtl_vendor_config_entry entry[0]; + struct rtl_vendor_config_entry entry[]; } __packed; #if IS_ENABLED(CONFIG_BT_RTL) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index f5924f3e8b8d..3bdec42c9612 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -57,6 +57,7 @@ static struct usb_driver btusb_driver; #define BTUSB_IFNUM_2 0x80000 #define BTUSB_CW6622 0x100000 #define BTUSB_MEDIATEK 0x200000 +#define BTUSB_WIDEBAND_SPEECH 0x400000 static const struct usb_device_id btusb_table[] = { /* Generic Bluetooth USB device */ @@ -333,15 +334,21 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x1286, 0x204e), .driver_info = BTUSB_MARVELL }, /* Intel Bluetooth devices */ - { USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_NEW }, - { USB_DEVICE(0x8087, 0x0026), .driver_info = BTUSB_INTEL_NEW }, - { USB_DEVICE(0x8087, 0x0029), .driver_info = BTUSB_INTEL_NEW }, + { USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_NEW | + BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x8087, 0x0026), .driver_info = BTUSB_INTEL_NEW | + BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x8087, 0x0029), .driver_info = BTUSB_INTEL_NEW | + BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR }, { USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL }, { USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL }, - { USB_DEVICE(0x8087, 0x0a2b), .driver_info = BTUSB_INTEL_NEW }, - { USB_DEVICE(0x8087, 0x0aa7), .driver_info = BTUSB_INTEL }, - { USB_DEVICE(0x8087, 0x0aaa), .driver_info = BTUSB_INTEL_NEW }, + { USB_DEVICE(0x8087, 0x0a2b), .driver_info = BTUSB_INTEL_NEW | + BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x8087, 0x0aa7), .driver_info = BTUSB_INTEL | + BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x8087, 0x0aaa), .driver_info = BTUSB_INTEL_NEW | + BTUSB_WIDEBAND_SPEECH }, /* Other Intel Bluetooth devices */ { USB_VENDOR_AND_INTERFACE_INFO(0x8087, 0xe0, 0x01, 0x01), @@ -387,6 +394,7 @@ static const struct usb_device_id blacklist_table[] = { /* Additional Realtek 8822CE Bluetooth devices */ { USB_DEVICE(0x04ca, 0x4005), .driver_info = BTUSB_REALTEK }, + { USB_DEVICE(0x13d3, 0x3548), .driver_info = BTUSB_REALTEK }, /* Silicon Wave based devices */ { USB_DEVICE(0x0c10, 0x0000), .driver_info = BTUSB_SWAVE }, @@ -1930,7 +1938,14 @@ static int btusb_setup_intel(struct hci_dev *hdev) if (err) return err; - bt_dev_info(hdev, "Intel firmware patch completed and activated"); + /* Need build number for downloaded fw patches in + * every power-on boot + */ + err = btintel_read_version(hdev, &ver); + if (err) + return err; + bt_dev_info(hdev, "Intel BT fw patch 0x%02x completed & activated", + ver.fw_patch_num); goto complete; @@ -3859,6 +3874,9 @@ static int btusb_probe(struct usb_interface *intf, if (id->driver_info & BTUSB_BROKEN_ISOC) data->isoc = NULL; + if (id->driver_info & BTUSB_WIDEBAND_SPEECH) + set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); + if (id->driver_info & BTUSB_DIGIANSWER) { data->cmdreq_type = USB_TYPE_VENDOR; set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); diff --git a/drivers/bluetooth/hci_ag6xx.c b/drivers/bluetooth/hci_ag6xx.c index 8bafa650b5b0..1f55df93e4ce 100644 --- a/drivers/bluetooth/hci_ag6xx.c +++ b/drivers/bluetooth/hci_ag6xx.c @@ -27,7 +27,7 @@ struct ag6xx_data { struct pbn_entry { __le32 addr; __le32 plen; - __u8 data[0]; + __u8 data[]; } __packed; static int ag6xx_open(struct hci_uart *hu) diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c index 6dc1fbeb564b..4b3b14a34794 100644 --- a/drivers/bluetooth/hci_h4.c +++ b/drivers/bluetooth/hci_h4.c @@ -71,8 +71,6 @@ static int h4_close(struct hci_uart *hu) { struct h4_struct *h4 = hu->priv; - hu->priv = NULL; - BT_DBG("hu %p", hu); skb_queue_purge(&h4->txq); @@ -85,7 +83,7 @@ static int h4_close(struct hci_uart *hu) return 0; } -/* Enqueue frame for transmittion (padding, crc, etc) */ +/* Enqueue frame for transmission (padding, crc, etc) */ static int h4_enqueue(struct hci_uart *hu, struct sk_buff *skb) { struct h4_struct *h4 = hu->priv; diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c index 0b14547482a7..106c110efe56 100644 --- a/drivers/bluetooth/hci_h5.c +++ b/drivers/bluetooth/hci_h5.c @@ -11,6 +11,7 @@ #include <linux/gpio/consumer.h> #include <linux/kernel.h> #include <linux/mod_devicetable.h> +#include <linux/of_device.h> #include <linux/serdev.h> #include <linux/skbuff.h> @@ -177,7 +178,7 @@ static void h5_peer_reset(struct hci_uart *hu) { struct h5 *h5 = hu->priv; - BT_ERR("Peer device has reset"); + bt_dev_err(hu->hdev, "Peer device has reset"); h5->state = H5_UNINITIALIZED; @@ -437,21 +438,21 @@ static int h5_rx_3wire_hdr(struct hci_uart *hu, unsigned char c) H5_HDR_LEN(hdr)); if (((hdr[0] + hdr[1] + hdr[2] + hdr[3]) & 0xff) != 0xff) { - BT_ERR("Invalid header checksum"); + bt_dev_err(hu->hdev, "Invalid header checksum"); h5_reset_rx(h5); return 0; } if (H5_HDR_RELIABLE(hdr) && H5_HDR_SEQ(hdr) != h5->tx_ack) { - BT_ERR("Out-of-order packet arrived (%u != %u)", - H5_HDR_SEQ(hdr), h5->tx_ack); + bt_dev_err(hu->hdev, "Out-of-order packet arrived (%u != %u)", + H5_HDR_SEQ(hdr), h5->tx_ack); h5_reset_rx(h5); return 0; } if (h5->state != H5_ACTIVE && H5_HDR_PKT_TYPE(hdr) != HCI_3WIRE_LINK_PKT) { - BT_ERR("Non-link packet received in non-active state"); + bt_dev_err(hu->hdev, "Non-link packet received in non-active state"); h5_reset_rx(h5); return 0; } @@ -474,7 +475,7 @@ static int h5_rx_pkt_start(struct hci_uart *hu, unsigned char c) h5->rx_skb = bt_skb_alloc(H5_MAX_LEN, GFP_ATOMIC); if (!h5->rx_skb) { - BT_ERR("Can't allocate mem for new packet"); + bt_dev_err(hu->hdev, "Can't allocate mem for new packet"); h5_reset_rx(h5); return -ENOMEM; } @@ -550,7 +551,7 @@ static int h5_recv(struct hci_uart *hu, const void *data, int count) if (h5->rx_pending > 0) { if (*ptr == SLIP_DELIMITER) { - BT_ERR("Too short H5 packet"); + bt_dev_err(hu->hdev, "Too short H5 packet"); h5_reset_rx(h5); continue; } @@ -577,13 +578,13 @@ static int h5_enqueue(struct hci_uart *hu, struct sk_buff *skb) struct h5 *h5 = hu->priv; if (skb->len > 0xfff) { - BT_ERR("Packet too long (%u bytes)", skb->len); + bt_dev_err(hu->hdev, "Packet too long (%u bytes)", skb->len); kfree_skb(skb); return 0; } if (h5->state != H5_ACTIVE) { - BT_ERR("Ignoring HCI data in non-active state"); + bt_dev_err(hu->hdev, "Ignoring HCI data in non-active state"); kfree_skb(skb); return 0; } @@ -600,7 +601,7 @@ static int h5_enqueue(struct hci_uart *hu, struct sk_buff *skb) break; default: - BT_ERR("Unknown packet type %u", hci_skb_pkt_type(skb)); + bt_dev_err(hu->hdev, "Unknown packet type %u", hci_skb_pkt_type(skb)); kfree_skb(skb); break; } @@ -656,7 +657,7 @@ static struct sk_buff *h5_prepare_pkt(struct hci_uart *hu, u8 pkt_type, int i; if (!valid_packet_type(pkt_type)) { - BT_ERR("Unknown packet type %u", pkt_type); + bt_dev_err(hu->hdev, "Unknown packet type %u", pkt_type); return NULL; } @@ -733,7 +734,7 @@ static struct sk_buff *h5_dequeue(struct hci_uart *hu) } skb_queue_head(&h5->unrel, skb); - BT_ERR("Could not dequeue pkt because alloc_skb failed"); + bt_dev_err(hu->hdev, "Could not dequeue pkt because alloc_skb failed"); } spin_lock_irqsave_nested(&h5->unack.lock, flags, SINGLE_DEPTH_NESTING); @@ -753,7 +754,7 @@ static struct sk_buff *h5_dequeue(struct hci_uart *hu) } skb_queue_head(&h5->rel, skb); - BT_ERR("Could not dequeue pkt because alloc_skb failed"); + bt_dev_err(hu->hdev, "Could not dequeue pkt because alloc_skb failed"); } unlock: @@ -785,7 +786,6 @@ static const struct hci_uart_proto h5p = { static int h5_serdev_probe(struct serdev_device *serdev) { - const struct acpi_device_id *match; struct device *dev = &serdev->dev; struct h5 *h5; @@ -800,6 +800,8 @@ static int h5_serdev_probe(struct serdev_device *serdev) serdev_device_set_drvdata(serdev, h5); if (has_acpi_companion(dev)) { + const struct acpi_device_id *match; + match = acpi_match_device(dev->driver->acpi_match_table, dev); if (!match) return -ENODEV; @@ -810,8 +812,17 @@ static int h5_serdev_probe(struct serdev_device *serdev) if (h5->vnd->acpi_gpio_map) devm_acpi_dev_add_driver_gpios(dev, h5->vnd->acpi_gpio_map); + } else { + const void *data; + + data = of_device_get_match_data(dev); + if (!data) + return -ENODEV; + + h5->vnd = (const struct h5_vnd *)data; } + h5->enable_gpio = devm_gpiod_get_optional(dev, "enable", GPIOD_OUT_LOW); if (IS_ERR(h5->enable_gpio)) return PTR_ERR(h5->enable_gpio); @@ -1003,6 +1014,15 @@ static const struct dev_pm_ops h5_serdev_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(h5_serdev_suspend, h5_serdev_resume) }; +static const struct of_device_id rtl_bluetooth_of_match[] = { +#ifdef CONFIG_BT_HCIUART_RTL + { .compatible = "realtek,rtl8822cs-bt", + .data = (const void *)&rtl_vnd }, +#endif + { }, +}; +MODULE_DEVICE_TABLE(of, rtl_bluetooth_of_match); + static struct serdev_device_driver h5_serdev_driver = { .probe = h5_serdev_probe, .remove = h5_serdev_remove, @@ -1010,6 +1030,7 @@ static struct serdev_device_driver h5_serdev_driver = { .name = "hci_uart_h5", .acpi_match_table = ACPI_PTR(h5_acpi_match), .pm = &h5_serdev_pm_ops, + .of_match_table = rtl_bluetooth_of_match, }, }; diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c index 31f25153087d..f1299da6eed8 100644 --- a/drivers/bluetooth/hci_intel.c +++ b/drivers/bluetooth/hci_intel.c @@ -49,7 +49,7 @@ struct hci_lpm_pkt { __u8 opcode; __u8 dlen; - __u8 data[0]; + __u8 data[]; } __packed; struct intel_device { diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index d6e0c99ee5eb..439392b1c043 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -29,6 +29,7 @@ #include <linux/platform_device.h> #include <linux/regulator/consumer.h> #include <linux/serdev.h> +#include <linux/mutex.h> #include <asm/unaligned.h> #include <net/bluetooth/bluetooth.h> @@ -69,7 +70,8 @@ enum qca_flags { QCA_IBS_ENABLED, QCA_DROP_VENDOR_EVENT, QCA_SUSPENDING, - QCA_MEMDUMP_COLLECTION + QCA_MEMDUMP_COLLECTION, + QCA_HW_ERROR_EVENT }; @@ -138,18 +140,19 @@ struct qca_data { u32 tx_idle_delay; struct timer_list wake_retrans_timer; u32 wake_retrans; - struct timer_list memdump_timer; struct workqueue_struct *workqueue; struct work_struct ws_awake_rx; struct work_struct ws_awake_device; struct work_struct ws_rx_vote_off; struct work_struct ws_tx_vote_off; struct work_struct ctrl_memdump_evt; + struct delayed_work ctrl_memdump_timeout; struct qca_memdump_data *qca_memdump; unsigned long flags; struct completion drop_ev_comp; wait_queue_head_t suspend_wait_q; enum qca_memdump_states memdump_state; + struct mutex hci_memdump_lock; /* For debugging purpose */ u64 ibs_sent_wacks; @@ -522,23 +525,28 @@ static void hci_ibs_wake_retrans_timeout(struct timer_list *t) hci_uart_tx_wakeup(hu); } -static void hci_memdump_timeout(struct timer_list *t) + +static void qca_controller_memdump_timeout(struct work_struct *work) { - struct qca_data *qca = from_timer(qca, t, tx_idle_timer); + struct qca_data *qca = container_of(work, struct qca_data, + ctrl_memdump_timeout.work); struct hci_uart *hu = qca->hu; - struct qca_memdump_data *qca_memdump = qca->qca_memdump; - char *memdump_buf = qca_memdump->memdump_buf_tail; - - bt_dev_err(hu->hdev, "clearing allocated memory due to memdump timeout"); - /* Inject hw error event to reset the device and driver. */ - hci_reset_dev(hu->hdev); - vfree(memdump_buf); - kfree(qca_memdump); - qca->memdump_state = QCA_MEMDUMP_TIMEOUT; - del_timer(&qca->memdump_timer); - cancel_work_sync(&qca->ctrl_memdump_evt); + + mutex_lock(&qca->hci_memdump_lock); + if (test_bit(QCA_MEMDUMP_COLLECTION, &qca->flags)) { + qca->memdump_state = QCA_MEMDUMP_TIMEOUT; + if (!test_bit(QCA_HW_ERROR_EVENT, &qca->flags)) { + /* Inject hw error event to reset the device + * and driver. + */ + hci_reset_dev(hu->hdev); + } + } + + mutex_unlock(&qca->hci_memdump_lock); } + /* Initialize protocol */ static int qca_open(struct hci_uart *hu) { @@ -558,6 +566,7 @@ static int qca_open(struct hci_uart *hu) skb_queue_head_init(&qca->tx_wait_q); skb_queue_head_init(&qca->rx_memdump_q); spin_lock_init(&qca->hci_ibs_lock); + mutex_init(&qca->hci_memdump_lock); qca->workqueue = alloc_ordered_workqueue("qca_wq", 0); if (!qca->workqueue) { BT_ERR("QCA Workqueue not initialized properly"); @@ -570,6 +579,8 @@ static int qca_open(struct hci_uart *hu) INIT_WORK(&qca->ws_rx_vote_off, qca_wq_serial_rx_clock_vote_off); INIT_WORK(&qca->ws_tx_vote_off, qca_wq_serial_tx_clock_vote_off); INIT_WORK(&qca->ctrl_memdump_evt, qca_controller_memdump); + INIT_DELAYED_WORK(&qca->ctrl_memdump_timeout, + qca_controller_memdump_timeout); init_waitqueue_head(&qca->suspend_wait_q); qca->hu = hu; @@ -596,7 +607,6 @@ static int qca_open(struct hci_uart *hu) timer_setup(&qca->tx_idle_timer, hci_ibs_tx_idle_timeout, 0); qca->tx_idle_delay = IBS_HOST_TX_IDLE_TIMEOUT_MS; - timer_setup(&qca->memdump_timer, hci_memdump_timeout, 0); BT_DBG("HCI_UART_QCA open, tx_idle_delay=%u, wake_retrans=%u", qca->tx_idle_delay, qca->wake_retrans); @@ -677,7 +687,6 @@ static int qca_close(struct hci_uart *hu) skb_queue_purge(&qca->rx_memdump_q); del_timer(&qca->tx_idle_timer); del_timer(&qca->wake_retrans_timer); - del_timer(&qca->memdump_timer); destroy_workqueue(qca->workqueue); qca->hu = NULL; @@ -963,11 +972,20 @@ static void qca_controller_memdump(struct work_struct *work) while ((skb = skb_dequeue(&qca->rx_memdump_q))) { + mutex_lock(&qca->hci_memdump_lock); + /* Skip processing the received packets if timeout detected. */ + if (qca->memdump_state == QCA_MEMDUMP_TIMEOUT) { + mutex_unlock(&qca->hci_memdump_lock); + return; + } + if (!qca_memdump) { qca_memdump = kzalloc(sizeof(struct qca_memdump_data), GFP_ATOMIC); - if (!qca_memdump) + if (!qca_memdump) { + mutex_unlock(&qca->hci_memdump_lock); return; + } qca->qca_memdump = qca_memdump; } @@ -992,13 +1010,15 @@ static void qca_controller_memdump(struct work_struct *work) if (!(dump_size)) { bt_dev_err(hu->hdev, "Rx invalid memdump size"); kfree_skb(skb); + mutex_unlock(&qca->hci_memdump_lock); return; } bt_dev_info(hu->hdev, "QCA collecting dump of size:%u", dump_size); - mod_timer(&qca->memdump_timer, (jiffies + - msecs_to_jiffies(MEMDUMP_TIMEOUT_MS))); + queue_delayed_work(qca->workqueue, + &qca->ctrl_memdump_timeout, + msecs_to_jiffies(MEMDUMP_TIMEOUT_MS)); skb_pull(skb, sizeof(dump_size)); memdump_buf = vmalloc(dump_size); @@ -1016,6 +1036,7 @@ static void qca_controller_memdump(struct work_struct *work) kfree(qca_memdump); kfree_skb(skb); qca->qca_memdump = NULL; + mutex_unlock(&qca->hci_memdump_lock); return; } @@ -1046,16 +1067,20 @@ static void qca_controller_memdump(struct work_struct *work) memdump_buf = qca_memdump->memdump_buf_head; dev_coredumpv(&hu->serdev->dev, memdump_buf, qca_memdump->received_dump, GFP_KERNEL); - del_timer(&qca->memdump_timer); + cancel_delayed_work(&qca->ctrl_memdump_timeout); kfree(qca->qca_memdump); qca->qca_memdump = NULL; qca->memdump_state = QCA_MEMDUMP_COLLECTED; + clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags); } + + mutex_unlock(&qca->hci_memdump_lock); } } -int qca_controller_memdump_event(struct hci_dev *hdev, struct sk_buff *skb) +static int qca_controller_memdump_event(struct hci_dev *hdev, + struct sk_buff *skb) { struct hci_uart *hu = hci_get_drvdata(hdev); struct qca_data *qca = hu->priv; @@ -1406,30 +1431,21 @@ static void qca_wait_for_dump_collection(struct hci_dev *hdev) { struct hci_uart *hu = hci_get_drvdata(hdev); struct qca_data *qca = hu->priv; - struct qca_memdump_data *qca_memdump = qca->qca_memdump; - char *memdump_buf = NULL; wait_on_bit_timeout(&qca->flags, QCA_MEMDUMP_COLLECTION, TASK_UNINTERRUPTIBLE, MEMDUMP_TIMEOUT_MS); clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags); - if (qca->memdump_state == QCA_MEMDUMP_IDLE) { - bt_dev_err(hu->hdev, "Clearing the buffers due to timeout"); - if (qca_memdump) - memdump_buf = qca_memdump->memdump_buf_tail; - vfree(memdump_buf); - kfree(qca_memdump); - qca->memdump_state = QCA_MEMDUMP_TIMEOUT; - del_timer(&qca->memdump_timer); - cancel_work_sync(&qca->ctrl_memdump_evt); - } } static void qca_hw_error(struct hci_dev *hdev, u8 code) { struct hci_uart *hu = hci_get_drvdata(hdev); struct qca_data *qca = hu->priv; + struct qca_memdump_data *qca_memdump = qca->qca_memdump; + char *memdump_buf = NULL; + set_bit(QCA_HW_ERROR_EVENT, &qca->flags); bt_dev_info(hdev, "mem_dump_status: %d", qca->memdump_state); if (qca->memdump_state == QCA_MEMDUMP_IDLE) { @@ -1449,6 +1465,23 @@ static void qca_hw_error(struct hci_dev *hdev, u8 code) bt_dev_info(hdev, "waiting for dump to complete"); qca_wait_for_dump_collection(hdev); } + + if (qca->memdump_state != QCA_MEMDUMP_COLLECTED) { + bt_dev_err(hu->hdev, "clearing allocated memory due to memdump timeout"); + mutex_lock(&qca->hci_memdump_lock); + if (qca_memdump) + memdump_buf = qca_memdump->memdump_buf_head; + vfree(memdump_buf); + kfree(qca_memdump); + qca->qca_memdump = NULL; + qca->memdump_state = QCA_MEMDUMP_TIMEOUT; + cancel_delayed_work(&qca->ctrl_memdump_timeout); + skb_queue_purge(&qca->rx_memdump_q); + mutex_unlock(&qca->hci_memdump_lock); + cancel_work_sync(&qca->ctrl_memdump_evt); + } + + clear_bit(QCA_HW_ERROR_EVENT, &qca->flags); } static void qca_cmd_timeout(struct hci_dev *hdev) @@ -1529,9 +1562,11 @@ static int qca_power_on(struct hci_dev *hdev) ret = qca_wcn3990_init(hu); } else { qcadev = serdev_device_get_drvdata(hu->serdev); - gpiod_set_value_cansleep(qcadev->bt_en, 1); - /* Controller needs time to bootup. */ - msleep(150); + if (qcadev->bt_en) { + gpiod_set_value_cansleep(qcadev->bt_en, 1); + /* Controller needs time to bootup. */ + msleep(150); + } } return ret; @@ -1717,7 +1752,7 @@ static void qca_power_shutdown(struct hci_uart *hu) host_set_baudrate(hu, 2400); qca_send_power_pulse(hu, false); qca_regulator_disable(qcadev); - } else { + } else if (qcadev->bt_en) { gpiod_set_value_cansleep(qcadev->bt_en, 0); } } @@ -1726,9 +1761,11 @@ static int qca_power_off(struct hci_dev *hdev) { struct hci_uart *hu = hci_get_drvdata(hdev); struct qca_data *qca = hu->priv; + enum qca_btsoc_type soc_type = qca_soc_type(hu); /* Stop sending shutdown command if soc crashes. */ - if (qca->memdump_state == QCA_MEMDUMP_IDLE) { + if (qca_is_wcn399x(soc_type) + && qca->memdump_state == QCA_MEMDUMP_IDLE) { qca_send_pre_shutdown_cmd(hdev); usleep_range(8000, 10000); } @@ -1755,7 +1792,11 @@ static int qca_regulator_enable(struct qca_serdev *qcadev) power->vregs_on = true; - return 0; + ret = clk_prepare_enable(qcadev->susclk); + if (ret) + qca_regulator_disable(qcadev); + + return ret; } static void qca_regulator_disable(struct qca_serdev *qcadev) @@ -1773,6 +1814,8 @@ static void qca_regulator_disable(struct qca_serdev *qcadev) regulator_bulk_disable(power->num_vregs, power->vreg_bulk); power->vregs_on = false; + + clk_disable_unprepare(qcadev->susclk); } static int qca_init_regulators(struct qca_power *qca, @@ -1811,6 +1854,7 @@ static int qca_serdev_probe(struct serdev_device *serdev) struct hci_dev *hdev; const struct qca_vreg_data *data; int err; + bool power_ctrl_enabled = true; qcadev = devm_kzalloc(&serdev->dev, sizeof(*qcadev), GFP_KERNEL); if (!qcadev) @@ -1839,6 +1883,12 @@ static int qca_serdev_probe(struct serdev_device *serdev) qcadev->bt_power->vregs_on = false; + qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL); + if (IS_ERR(qcadev->susclk)) { + dev_err(&serdev->dev, "failed to acquire clk\n"); + return PTR_ERR(qcadev->susclk); + } + device_property_read_u32(&serdev->dev, "max-speed", &qcadev->oper_speed); if (!qcadev->oper_speed) @@ -1851,38 +1901,40 @@ static int qca_serdev_probe(struct serdev_device *serdev) } } else { qcadev->btsoc_type = QCA_ROME; - qcadev->bt_en = devm_gpiod_get(&serdev->dev, "enable", + qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", GPIOD_OUT_LOW); - if (IS_ERR(qcadev->bt_en)) { - dev_err(&serdev->dev, "failed to acquire enable gpio\n"); - return PTR_ERR(qcadev->bt_en); + if (!qcadev->bt_en) { + dev_warn(&serdev->dev, "failed to acquire enable gpio\n"); + power_ctrl_enabled = false; } - qcadev->susclk = devm_clk_get(&serdev->dev, NULL); - if (IS_ERR(qcadev->susclk)) { - dev_err(&serdev->dev, "failed to acquire clk\n"); - return PTR_ERR(qcadev->susclk); - } - - err = clk_set_rate(qcadev->susclk, SUSCLK_RATE_32KHZ); - if (err) - return err; + qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL); + if (!qcadev->susclk) { + dev_warn(&serdev->dev, "failed to acquire clk\n"); + } else { + err = clk_set_rate(qcadev->susclk, SUSCLK_RATE_32KHZ); + if (err) + return err; - err = clk_prepare_enable(qcadev->susclk); - if (err) - return err; + err = clk_prepare_enable(qcadev->susclk); + if (err) + return err; + } err = hci_uart_register_device(&qcadev->serdev_hu, &qca_proto); if (err) { BT_ERR("Rome serdev registration failed"); - clk_disable_unprepare(qcadev->susclk); + if (qcadev->susclk) + clk_disable_unprepare(qcadev->susclk); return err; } } - hdev = qcadev->serdev_hu.hdev; - set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks); - hdev->shutdown = qca_power_off; + if (power_ctrl_enabled) { + hdev = qcadev->serdev_hu.hdev; + set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks); + hdev->shutdown = qca_power_off; + } return 0; } @@ -1893,7 +1945,7 @@ static void qca_serdev_remove(struct serdev_device *serdev) if (qca_is_wcn399x(qcadev->btsoc_type)) qca_power_shutdown(&qcadev->serdev_hu); - else + else if (qcadev->susclk) clk_disable_unprepare(qcadev->susclk); hci_uart_unregister_device(&qcadev->serdev_hu); diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index f702c85c81b6..6113fc0a52ae 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -1400,7 +1400,7 @@ static void sysc_init_revision_quirks(struct sysc *ddata) } /* 1-wire needs module's internal clocks enabled for reset */ -static void sysc_clk_enable_quirk_hdq1w(struct sysc *ddata) +static void sysc_pre_reset_quirk_hdq1w(struct sysc *ddata) { int offset = 0x0c; /* HDQ_CTRL_STATUS */ u16 val; @@ -1488,7 +1488,7 @@ static void sysc_init_module_quirks(struct sysc *ddata) return; if (ddata->cfg.quirks & SYSC_MODULE_QUIRK_HDQ1W) { - ddata->clk_enable_quirk = sysc_clk_enable_quirk_hdq1w; + ddata->clk_disable_quirk = sysc_pre_reset_quirk_hdq1w; return; } diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 886b2638c730..c51292c2a131 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -519,7 +519,7 @@ static const struct block_device_operations gdrom_bdops = { .check_events = gdrom_bdops_check_events, .ioctl = gdrom_bdops_ioctl, #ifdef CONFIG_COMPAT - .ioctl = blkdev_compat_ptr_ioctl, + .compat_ioctl = blkdev_compat_ptr_ioctl, #endif }; diff --git a/drivers/char/ipmi/ipmi_si_platform.c b/drivers/char/ipmi/ipmi_si_platform.c index c78127ccbc0d..638c693e17ad 100644 --- a/drivers/char/ipmi/ipmi_si_platform.c +++ b/drivers/char/ipmi/ipmi_si_platform.c @@ -194,7 +194,7 @@ static int platform_ipmi_probe(struct platform_device *pdev) else io.slave_addr = slave_addr; - io.irq = platform_get_irq(pdev, 0); + io.irq = platform_get_irq_optional(pdev, 0); if (io.irq > 0) io.irq_setup = ipmi_std_irq_setup; else @@ -378,7 +378,7 @@ static int acpi_ipmi_probe(struct platform_device *pdev) io.irq = tmp; io.irq_setup = acpi_gpe_irq_setup; } else { - int irq = platform_get_irq(pdev, 0); + int irq = platform_get_irq_optional(pdev, 0); if (irq > 0) { io.irq = irq; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index cbe6c94bf158..808874bccf4a 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1076,9 +1076,17 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy) pol = policy->last_policy; } else if (def_gov) { pol = cpufreq_parse_policy(def_gov->name); - } else { - return -ENODATA; + /* + * In case the default governor is neiter "performance" + * nor "powersave", fall back to the initial policy + * value set by the driver. + */ + if (pol == CPUFREQ_POLICY_UNKNOWN) + pol = policy->policy; } + if (pol != CPUFREQ_POLICY_PERFORMANCE && + pol != CPUFREQ_POLICY_POWERSAVE) + return -ENODATA; } return cpufreq_set_policy(policy, gov, pol); diff --git a/drivers/crypto/chelsio/chcr_ktls.c b/drivers/crypto/chelsio/chcr_ktls.c index f0c3834eda4f..00099e793e63 100644 --- a/drivers/crypto/chelsio/chcr_ktls.c +++ b/drivers/crypto/chelsio/chcr_ktls.c @@ -659,10 +659,9 @@ int chcr_ktls_cpl_set_tcb_rpl(struct adapter *adap, unsigned char *input) const struct cpl_set_tcb_rpl *p = (void *)input; struct chcr_ktls_info *tx_info = NULL; struct tid_info *t; - u32 tid, status; + u32 tid; tid = GET_TID(p); - status = p->status; t = &adap->tids; tx_info = lookup_tid(t, tid); diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c index 5cf9b021220b..e1651adb9d06 100644 --- a/drivers/crypto/chelsio/chtls/chtls_io.c +++ b/drivers/crypto/chelsio/chtls/chtls_io.c @@ -902,14 +902,6 @@ static int chtls_skb_copy_to_page_nocache(struct sock *sk, return 0; } -/* Read TLS header to find content type and data length */ -static int tls_header_read(struct tls_hdr *thdr, struct iov_iter *from) -{ - if (copy_from_iter(thdr, sizeof(*thdr), from) != sizeof(*thdr)) - return -EFAULT; - return (__force int)cpu_to_be16(thdr->length); -} - static int csk_mem_free(struct chtls_dev *cdev, struct sock *sk) { return (cdev->max_host_sndbuf - sk->sk_wmem_queued); @@ -981,6 +973,37 @@ do_interrupted: goto do_rm_wq; } +static int chtls_proccess_cmsg(struct sock *sk, struct msghdr *msg, + unsigned char *record_type) +{ + struct cmsghdr *cmsg; + int rc = -EINVAL; + + for_each_cmsghdr(cmsg, msg) { + if (!CMSG_OK(msg, cmsg)) + return -EINVAL; + if (cmsg->cmsg_level != SOL_TLS) + continue; + + switch (cmsg->cmsg_type) { + case TLS_SET_RECORD_TYPE: + if (cmsg->cmsg_len < CMSG_LEN(sizeof(*record_type))) + return -EINVAL; + + if (msg->msg_flags & MSG_MORE) + return -EINVAL; + + *record_type = *(unsigned char *)CMSG_DATA(cmsg); + rc = 0; + break; + default: + return -EINVAL; + } + } + + return rc; +} + int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); @@ -1022,15 +1045,21 @@ int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) goto wait_for_sndbuf; if (is_tls_tx(csk) && !csk->tlshws.txleft) { - struct tls_hdr hdr; + unsigned char record_type = TLS_RECORD_TYPE_DATA; - recordsz = tls_header_read(&hdr, &msg->msg_iter); - size -= TLS_HEADER_LENGTH; - copied += TLS_HEADER_LENGTH; + if (unlikely(msg->msg_controllen)) { + err = chtls_proccess_cmsg(sk, msg, + &record_type); + if (err) + goto out_err; + } + + recordsz = size; csk->tlshws.txleft = recordsz; - csk->tlshws.type = hdr.type; + csk->tlshws.type = record_type; + if (skb) - ULP_SKB_CB(skb)->ulp.tls.type = hdr.type; + ULP_SKB_CB(skb)->ulp.tls.type = record_type; } if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) || @@ -1521,6 +1550,22 @@ found_ok_skb: } } } + /* Set record type if not already done. For a non-data record, + * do not proceed if record type could not be copied. + */ + if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) { + struct tls_hdr *thdr = (struct tls_hdr *)skb->data; + int cerr = 0; + + cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE, + sizeof(thdr->type), &thdr->type); + + if (cerr && thdr->type != TLS_RECORD_TYPE_DATA) + return -EIO; + /* don't send tls header, skip copy */ + goto skip_copy; + } + if (skb_copy_datagram_msg(skb, offset, msg, avail)) { if (!copied) { copied = -EFAULT; diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index cceee8bc3c2f..7dcf2093e531 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -738,7 +738,6 @@ struct devfreq *devfreq_add_device(struct device *dev, { struct devfreq *devfreq; struct devfreq_governor *governor; - static atomic_t devfreq_no = ATOMIC_INIT(-1); int err = 0; if (!dev || !profile || !governor_name) { @@ -800,8 +799,7 @@ struct devfreq *devfreq_add_device(struct device *dev, devfreq->suspend_freq = dev_pm_opp_get_suspend_opp_freq(dev); atomic_set(&devfreq->suspend_count, 0); - dev_set_name(&devfreq->dev, "devfreq%d", - atomic_inc_return(&devfreq_no)); + dev_set_name(&devfreq->dev, "%s", dev_name(dev)); err = device_register(&devfreq->dev); if (err) { mutex_unlock(&devfreq->lock); diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index d4097856c86b..c343c7c10b4c 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -108,6 +108,7 @@ static int dma_buf_release(struct inode *inode, struct file *file) dma_resv_fini(dmabuf->resv); module_put(dmabuf->owner); + kfree(dmabuf->name); kfree(dmabuf); return 0; } diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c index e51d836afcc7..1092d4ce723e 100644 --- a/drivers/dma/coh901318.c +++ b/drivers/dma/coh901318.c @@ -1947,8 +1947,6 @@ static void dma_tc_handle(struct coh901318_chan *cohc) return; } - spin_lock(&cohc->lock); - /* * When we reach this point, at least one queue item * should have been moved over from cohc->queue to @@ -1969,8 +1967,6 @@ static void dma_tc_handle(struct coh901318_chan *cohc) if (coh901318_queue_start(cohc) == NULL) cohc->busy = 0; - spin_unlock(&cohc->lock); - /* * This tasklet will remove items from cohc->active * and thus terminates them. diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 1d7347825b95..df47be612ebb 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -204,6 +204,7 @@ static int idxd_wq_cdev_dev_setup(struct idxd_wq *wq) minor = ida_simple_get(&cdev_ctx->minor_ida, 0, MINORMASK, GFP_KERNEL); if (minor < 0) { rc = minor; + kfree(dev); goto ida_err; } @@ -212,7 +213,6 @@ static int idxd_wq_cdev_dev_setup(struct idxd_wq *wq) rc = device_register(dev); if (rc < 0) { dev_err(&idxd->pdev->dev, "device register failed\n"); - put_device(dev); goto dev_reg_err; } idxd_cdev->minor = minor; @@ -221,8 +221,8 @@ static int idxd_wq_cdev_dev_setup(struct idxd_wq *wq) dev_reg_err: ida_simple_remove(&cdev_ctx->minor_ida, MINOR(dev->devt)); + put_device(dev); ida_err: - kfree(dev); idxd_cdev->dev = NULL; return rc; } diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 6d907fe150aa..6ca6e520a2fa 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -124,6 +124,7 @@ static int idxd_config_bus_probe(struct device *dev) rc = idxd_device_config(idxd); if (rc < 0) { spin_unlock_irqrestore(&idxd->dev_lock, flags); + module_put(THIS_MODULE); dev_warn(dev, "Device config failed: %d\n", rc); return rc; } @@ -132,6 +133,7 @@ static int idxd_config_bus_probe(struct device *dev) rc = idxd_device_enable(idxd); if (rc < 0) { spin_unlock_irqrestore(&idxd->dev_lock, flags); + module_put(THIS_MODULE); dev_warn(dev, "Device enable failed: %d\n", rc); return rc; } @@ -142,6 +144,7 @@ static int idxd_config_bus_probe(struct device *dev) rc = idxd_register_dma_device(idxd); if (rc < 0) { spin_unlock_irqrestore(&idxd->dev_lock, flags); + module_put(THIS_MODULE); dev_dbg(dev, "Failed to register dmaengine device\n"); return rc; } @@ -516,7 +519,7 @@ static ssize_t group_tokens_reserved_store(struct device *dev, if (val > idxd->max_tokens) return -EINVAL; - if (val > idxd->nr_tokens) + if (val > idxd->nr_tokens + group->tokens_reserved) return -EINVAL; group->tokens_reserved = val; @@ -901,6 +904,20 @@ static ssize_t wq_size_show(struct device *dev, struct device_attribute *attr, return sprintf(buf, "%u\n", wq->size); } +static int total_claimed_wq_size(struct idxd_device *idxd) +{ + int i; + int wq_size = 0; + + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *wq = &idxd->wqs[i]; + + wq_size += wq->size; + } + + return wq_size; +} + static ssize_t wq_size_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -920,7 +937,7 @@ static ssize_t wq_size_store(struct device *dev, if (wq->state != IDXD_WQ_DISABLED) return -EPERM; - if (size > idxd->max_wq_size) + if (size + total_claimed_wq_size(idxd) - wq->size > idxd->max_wq_size) return -EINVAL; wq->size = size; @@ -999,12 +1016,14 @@ static ssize_t wq_type_store(struct device *dev, return -EPERM; old_type = wq->type; - if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_KERNEL])) + if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_NONE])) + wq->type = IDXD_WQT_NONE; + else if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_KERNEL])) wq->type = IDXD_WQT_KERNEL; else if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_USER])) wq->type = IDXD_WQT_USER; else - wq->type = IDXD_WQT_NONE; + return -EINVAL; /* If we are changing queue type, clear the name */ if (wq->type != old_type) diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 066b21a32232..4d4477df4ede 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -1331,13 +1331,14 @@ static void sdma_free_chan_resources(struct dma_chan *chan) sdma_channel_synchronize(chan); - if (sdmac->event_id0) + if (sdmac->event_id0 >= 0) sdma_event_disable(sdmac, sdmac->event_id0); if (sdmac->event_id1) sdma_event_disable(sdmac, sdmac->event_id1); sdmac->event_id0 = 0; sdmac->event_id1 = 0; + sdmac->context_loaded = false; sdma_set_channel_priority(sdmac, 0); @@ -1631,7 +1632,7 @@ static int sdma_config(struct dma_chan *chan, memcpy(&sdmac->slave_config, dmaengine_cfg, sizeof(*dmaengine_cfg)); /* Set ENBLn earlier to make sure dma request triggered after that */ - if (sdmac->event_id0) { + if (sdmac->event_id0 >= 0) { if (sdmac->event_id0 >= sdmac->sdma->drvdata->num_events) return -EINVAL; sdma_event_enable(sdmac, sdmac->event_id0); diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index 3a45079d11ec..4a750e29bfb5 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -281,7 +281,7 @@ static struct tegra_dma_desc *tegra_dma_desc_get( /* Do not allocate if desc are waiting for ack */ list_for_each_entry(dma_desc, &tdc->free_dma_desc, node) { - if (async_tx_test_ack(&dma_desc->txd)) { + if (async_tx_test_ack(&dma_desc->txd) && !dma_desc->cb_count) { list_del(&dma_desc->node); spin_unlock_irqrestore(&tdc->lock, flags); dma_desc->txd.flags = 0; @@ -756,10 +756,6 @@ static int tegra_dma_terminate_all(struct dma_chan *dc) bool was_busy; spin_lock_irqsave(&tdc->lock, flags); - if (list_empty(&tdc->pending_sg_req)) { - spin_unlock_irqrestore(&tdc->lock, flags); - return 0; - } if (!tdc->busy) goto skip_dma_stop; diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index ea79c2df28e0..0536866a58ce 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -5,6 +5,7 @@ */ #include <linux/kernel.h> +#include <linux/delay.h> #include <linux/dmaengine.h> #include <linux/dma-mapping.h> #include <linux/dmapool.h> @@ -96,6 +97,24 @@ struct udma_match_data { u32 level_start_idx[]; }; +struct udma_hwdesc { + size_t cppi5_desc_size; + void *cppi5_desc_vaddr; + dma_addr_t cppi5_desc_paddr; + + /* TR descriptor internal pointers */ + void *tr_req_base; + struct cppi5_tr_resp_t *tr_resp_base; +}; + +struct udma_rx_flush { + struct udma_hwdesc hwdescs[2]; + + size_t buffer_size; + void *buffer_vaddr; + dma_addr_t buffer_paddr; +}; + struct udma_dev { struct dma_device ddev; struct device *dev; @@ -112,6 +131,8 @@ struct udma_dev { struct list_head desc_to_purge; spinlock_t lock; + struct udma_rx_flush rx_flush; + int tchan_cnt; int echan_cnt; int rchan_cnt; @@ -130,16 +151,6 @@ struct udma_dev { u32 psil_base; }; -struct udma_hwdesc { - size_t cppi5_desc_size; - void *cppi5_desc_vaddr; - dma_addr_t cppi5_desc_paddr; - - /* TR descriptor internal pointers */ - void *tr_req_base; - struct cppi5_tr_resp_t *tr_resp_base; -}; - struct udma_desc { struct virt_dma_desc vd; @@ -169,7 +180,7 @@ enum udma_chan_state { struct udma_tx_drain { struct delayed_work work; - unsigned long jiffie; + ktime_t tstamp; u32 residue; }; @@ -502,7 +513,7 @@ static bool udma_is_chan_paused(struct udma_chan *uc) { u32 val, pause_mask; - switch (uc->desc->dir) { + switch (uc->config.dir) { case DMA_DEV_TO_MEM: val = udma_rchanrt_read(uc->rchan, UDMA_RCHAN_RT_PEER_RT_EN_REG); @@ -551,12 +562,17 @@ static void udma_sync_for_device(struct udma_chan *uc, int idx) } } +static inline dma_addr_t udma_get_rx_flush_hwdesc_paddr(struct udma_chan *uc) +{ + return uc->ud->rx_flush.hwdescs[uc->config.pkt_mode].cppi5_desc_paddr; +} + static int udma_push_to_ring(struct udma_chan *uc, int idx) { struct udma_desc *d = uc->desc; - struct k3_ring *ring = NULL; - int ret = -EINVAL; + dma_addr_t paddr; + int ret; switch (uc->config.dir) { case DMA_DEV_TO_MEM: @@ -567,21 +583,37 @@ static int udma_push_to_ring(struct udma_chan *uc, int idx) ring = uc->tchan->t_ring; break; default: - break; + return -EINVAL; } - if (ring) { - dma_addr_t desc_addr = udma_curr_cppi5_desc_paddr(d, idx); + /* RX flush packet: idx == -1 is only passed in case of DEV_TO_MEM */ + if (idx == -1) { + paddr = udma_get_rx_flush_hwdesc_paddr(uc); + } else { + paddr = udma_curr_cppi5_desc_paddr(d, idx); wmb(); /* Ensure that writes are not moved over this point */ udma_sync_for_device(uc, idx); - ret = k3_ringacc_ring_push(ring, &desc_addr); - uc->in_ring_cnt++; } + ret = k3_ringacc_ring_push(ring, &paddr); + if (!ret) + uc->in_ring_cnt++; + return ret; } +static bool udma_desc_is_rx_flush(struct udma_chan *uc, dma_addr_t addr) +{ + if (uc->config.dir != DMA_DEV_TO_MEM) + return false; + + if (addr == udma_get_rx_flush_hwdesc_paddr(uc)) + return true; + + return false; +} + static int udma_pop_from_ring(struct udma_chan *uc, dma_addr_t *addr) { struct k3_ring *ring = NULL; @@ -610,6 +642,10 @@ static int udma_pop_from_ring(struct udma_chan *uc, dma_addr_t *addr) if (cppi5_desc_is_tdcm(*addr)) return ret; + /* Check for flush descriptor */ + if (udma_desc_is_rx_flush(uc, *addr)) + return -ENOENT; + d = udma_udma_desc_from_paddr(uc, *addr); if (d) @@ -890,6 +926,9 @@ static int udma_stop(struct udma_chan *uc) switch (uc->config.dir) { case DMA_DEV_TO_MEM: + if (!uc->cyclic && !uc->desc) + udma_push_to_ring(uc, -1); + udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_PEER_RT_EN_REG, UDMA_PEER_RT_EN_ENABLE | UDMA_PEER_RT_EN_TEARDOWN); @@ -946,9 +985,10 @@ static bool udma_is_desc_really_done(struct udma_chan *uc, struct udma_desc *d) peer_bcnt = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_PEER_BCNT_REG); bcnt = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_BCNT_REG); + /* Transfer is incomplete, store current residue and time stamp */ if (peer_bcnt < bcnt) { uc->tx_drain.residue = bcnt - peer_bcnt; - uc->tx_drain.jiffie = jiffies; + uc->tx_drain.tstamp = ktime_get(); return false; } @@ -961,35 +1001,59 @@ static void udma_check_tx_completion(struct work_struct *work) tx_drain.work.work); bool desc_done = true; u32 residue_diff; - unsigned long jiffie_diff, delay; + ktime_t time_diff; + unsigned long delay; + + while (1) { + if (uc->desc) { + /* Get previous residue and time stamp */ + residue_diff = uc->tx_drain.residue; + time_diff = uc->tx_drain.tstamp; + /* + * Get current residue and time stamp or see if + * transfer is complete + */ + desc_done = udma_is_desc_really_done(uc, uc->desc); + } - if (uc->desc) { - residue_diff = uc->tx_drain.residue; - jiffie_diff = uc->tx_drain.jiffie; - desc_done = udma_is_desc_really_done(uc, uc->desc); - } - - if (!desc_done) { - jiffie_diff = uc->tx_drain.jiffie - jiffie_diff; - residue_diff -= uc->tx_drain.residue; - if (residue_diff) { - /* Try to guess when we should check next time */ - residue_diff /= jiffie_diff; - delay = uc->tx_drain.residue / residue_diff / 3; - if (jiffies_to_msecs(delay) < 5) - delay = 0; - } else { - /* No progress, check again in 1 second */ - delay = HZ; + if (!desc_done) { + /* + * Find the time delta and residue delta w.r.t + * previous poll + */ + time_diff = ktime_sub(uc->tx_drain.tstamp, + time_diff) + 1; + residue_diff -= uc->tx_drain.residue; + if (residue_diff) { + /* + * Try to guess when we should check + * next time by calculating rate at + * which data is being drained at the + * peer device + */ + delay = (time_diff / residue_diff) * + uc->tx_drain.residue; + } else { + /* No progress, check again in 1 second */ + schedule_delayed_work(&uc->tx_drain.work, HZ); + break; + } + + usleep_range(ktime_to_us(delay), + ktime_to_us(delay) + 10); + continue; } - schedule_delayed_work(&uc->tx_drain.work, delay); - } else if (uc->desc) { - struct udma_desc *d = uc->desc; + if (uc->desc) { + struct udma_desc *d = uc->desc; - uc->bcnt += d->residue; - udma_start(uc); - vchan_cookie_complete(&d->vd); + uc->bcnt += d->residue; + udma_start(uc); + vchan_cookie_complete(&d->vd); + break; + } + + break; } } @@ -1033,29 +1097,27 @@ static irqreturn_t udma_ring_irq_handler(int irq, void *data) goto out; } - if (uc->cyclic) { - /* push the descriptor back to the ring */ - if (d == uc->desc) { + if (d == uc->desc) { + /* active descriptor */ + if (uc->cyclic) { udma_cyclic_packet_elapsed(uc); vchan_cyclic_callback(&d->vd); - } - } else { - bool desc_done = false; - - if (d == uc->desc) { - desc_done = udma_is_desc_really_done(uc, d); - - if (desc_done) { + } else { + if (udma_is_desc_really_done(uc, d)) { uc->bcnt += d->residue; udma_start(uc); + vchan_cookie_complete(&d->vd); } else { schedule_delayed_work(&uc->tx_drain.work, 0); } } - - if (desc_done) - vchan_cookie_complete(&d->vd); + } else { + /* + * terminated descriptor, mark the descriptor as + * completed to update the channel's cookie marker + */ + dma_cookie_complete(&d->vd.tx); } } out: @@ -1965,36 +2027,81 @@ static struct udma_desc *udma_alloc_tr_desc(struct udma_chan *uc, return d; } +/** + * udma_get_tr_counters - calculate TR counters for a given length + * @len: Length of the trasnfer + * @align_to: Preferred alignment + * @tr0_cnt0: First TR icnt0 + * @tr0_cnt1: First TR icnt1 + * @tr1_cnt0: Second (if used) TR icnt0 + * + * For len < SZ_64K only one TR is enough, tr1_cnt0 is not updated + * For len >= SZ_64K two TRs are used in a simple way: + * First TR: SZ_64K-alignment blocks (tr0_cnt0, tr0_cnt1) + * Second TR: the remaining length (tr1_cnt0) + * + * Returns the number of TRs the length needs (1 or 2) + * -EINVAL if the length can not be supported + */ +static int udma_get_tr_counters(size_t len, unsigned long align_to, + u16 *tr0_cnt0, u16 *tr0_cnt1, u16 *tr1_cnt0) +{ + if (len < SZ_64K) { + *tr0_cnt0 = len; + *tr0_cnt1 = 1; + + return 1; + } + + if (align_to > 3) + align_to = 3; + +realign: + *tr0_cnt0 = SZ_64K - BIT(align_to); + if (len / *tr0_cnt0 >= SZ_64K) { + if (align_to) { + align_to--; + goto realign; + } + return -EINVAL; + } + + *tr0_cnt1 = len / *tr0_cnt0; + *tr1_cnt0 = len % *tr0_cnt0; + + return 2; +} + static struct udma_desc * udma_prep_slave_sg_tr(struct udma_chan *uc, struct scatterlist *sgl, unsigned int sglen, enum dma_transfer_direction dir, unsigned long tx_flags, void *context) { - enum dma_slave_buswidth dev_width; struct scatterlist *sgent; struct udma_desc *d; - size_t tr_size; struct cppi5_tr_type1_t *tr_req = NULL; + u16 tr0_cnt0, tr0_cnt1, tr1_cnt0; unsigned int i; - u32 burst; + size_t tr_size; + int num_tr = 0; + int tr_idx = 0; - if (dir == DMA_DEV_TO_MEM) { - dev_width = uc->cfg.src_addr_width; - burst = uc->cfg.src_maxburst; - } else if (dir == DMA_MEM_TO_DEV) { - dev_width = uc->cfg.dst_addr_width; - burst = uc->cfg.dst_maxburst; - } else { - dev_err(uc->ud->dev, "%s: bad direction?\n", __func__); + if (!is_slave_direction(dir)) { + dev_err(uc->ud->dev, "Only slave cyclic is supported\n"); return NULL; } - if (!burst) - burst = 1; + /* estimate the number of TRs we will need */ + for_each_sg(sgl, sgent, sglen, i) { + if (sg_dma_len(sgent) < SZ_64K) + num_tr++; + else + num_tr += 2; + } /* Now allocate and setup the descriptor. */ tr_size = sizeof(struct cppi5_tr_type1_t); - d = udma_alloc_tr_desc(uc, tr_size, sglen, dir); + d = udma_alloc_tr_desc(uc, tr_size, num_tr, dir); if (!d) return NULL; @@ -2002,19 +2109,46 @@ udma_prep_slave_sg_tr(struct udma_chan *uc, struct scatterlist *sgl, tr_req = d->hwdesc[0].tr_req_base; for_each_sg(sgl, sgent, sglen, i) { - d->residue += sg_dma_len(sgent); + dma_addr_t sg_addr = sg_dma_address(sgent); + + num_tr = udma_get_tr_counters(sg_dma_len(sgent), __ffs(sg_addr), + &tr0_cnt0, &tr0_cnt1, &tr1_cnt0); + if (num_tr < 0) { + dev_err(uc->ud->dev, "size %u is not supported\n", + sg_dma_len(sgent)); + udma_free_hwdesc(uc, d); + kfree(d); + return NULL; + } cppi5_tr_init(&tr_req[i].flags, CPPI5_TR_TYPE1, false, false, CPPI5_TR_EVENT_SIZE_COMPLETION, 0); cppi5_tr_csf_set(&tr_req[i].flags, CPPI5_TR_CSF_SUPR_EVT); - tr_req[i].addr = sg_dma_address(sgent); - tr_req[i].icnt0 = burst * dev_width; - tr_req[i].dim1 = burst * dev_width; - tr_req[i].icnt1 = sg_dma_len(sgent) / tr_req[i].icnt0; + tr_req[tr_idx].addr = sg_addr; + tr_req[tr_idx].icnt0 = tr0_cnt0; + tr_req[tr_idx].icnt1 = tr0_cnt1; + tr_req[tr_idx].dim1 = tr0_cnt0; + tr_idx++; + + if (num_tr == 2) { + cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE1, + false, false, + CPPI5_TR_EVENT_SIZE_COMPLETION, 0); + cppi5_tr_csf_set(&tr_req[tr_idx].flags, + CPPI5_TR_CSF_SUPR_EVT); + + tr_req[tr_idx].addr = sg_addr + tr0_cnt1 * tr0_cnt0; + tr_req[tr_idx].icnt0 = tr1_cnt0; + tr_req[tr_idx].icnt1 = 1; + tr_req[tr_idx].dim1 = tr1_cnt0; + tr_idx++; + } + + d->residue += sg_dma_len(sgent); } - cppi5_tr_csf_set(&tr_req[i - 1].flags, CPPI5_TR_CSF_EOP); + cppi5_tr_csf_set(&tr_req[tr_idx - 1].flags, CPPI5_TR_CSF_EOP); return d; } @@ -2319,47 +2453,66 @@ udma_prep_dma_cyclic_tr(struct udma_chan *uc, dma_addr_t buf_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction dir, unsigned long flags) { - enum dma_slave_buswidth dev_width; struct udma_desc *d; - size_t tr_size; + size_t tr_size, period_addr; struct cppi5_tr_type1_t *tr_req; - unsigned int i; unsigned int periods = buf_len / period_len; - u32 burst; + u16 tr0_cnt0, tr0_cnt1, tr1_cnt0; + unsigned int i; + int num_tr; - if (dir == DMA_DEV_TO_MEM) { - dev_width = uc->cfg.src_addr_width; - burst = uc->cfg.src_maxburst; - } else if (dir == DMA_MEM_TO_DEV) { - dev_width = uc->cfg.dst_addr_width; - burst = uc->cfg.dst_maxburst; - } else { - dev_err(uc->ud->dev, "%s: bad direction?\n", __func__); + if (!is_slave_direction(dir)) { + dev_err(uc->ud->dev, "Only slave cyclic is supported\n"); return NULL; } - if (!burst) - burst = 1; + num_tr = udma_get_tr_counters(period_len, __ffs(buf_addr), &tr0_cnt0, + &tr0_cnt1, &tr1_cnt0); + if (num_tr < 0) { + dev_err(uc->ud->dev, "size %zu is not supported\n", + period_len); + return NULL; + } /* Now allocate and setup the descriptor. */ tr_size = sizeof(struct cppi5_tr_type1_t); - d = udma_alloc_tr_desc(uc, tr_size, periods, dir); + d = udma_alloc_tr_desc(uc, tr_size, periods * num_tr, dir); if (!d) return NULL; tr_req = d->hwdesc[0].tr_req_base; + period_addr = buf_addr; for (i = 0; i < periods; i++) { - cppi5_tr_init(&tr_req[i].flags, CPPI5_TR_TYPE1, false, false, - CPPI5_TR_EVENT_SIZE_COMPLETION, 0); + int tr_idx = i * num_tr; - tr_req[i].addr = buf_addr + period_len * i; - tr_req[i].icnt0 = dev_width; - tr_req[i].icnt1 = period_len / dev_width; - tr_req[i].dim1 = dev_width; + cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE1, false, + false, CPPI5_TR_EVENT_SIZE_COMPLETION, 0); + + tr_req[tr_idx].addr = period_addr; + tr_req[tr_idx].icnt0 = tr0_cnt0; + tr_req[tr_idx].icnt1 = tr0_cnt1; + tr_req[tr_idx].dim1 = tr0_cnt0; + + if (num_tr == 2) { + cppi5_tr_csf_set(&tr_req[tr_idx].flags, + CPPI5_TR_CSF_SUPR_EVT); + tr_idx++; + + cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE1, + false, false, + CPPI5_TR_EVENT_SIZE_COMPLETION, 0); + + tr_req[tr_idx].addr = period_addr + tr0_cnt1 * tr0_cnt0; + tr_req[tr_idx].icnt0 = tr1_cnt0; + tr_req[tr_idx].icnt1 = 1; + tr_req[tr_idx].dim1 = tr1_cnt0; + } if (!(flags & DMA_PREP_INTERRUPT)) - cppi5_tr_csf_set(&tr_req[i].flags, + cppi5_tr_csf_set(&tr_req[tr_idx].flags, CPPI5_TR_CSF_SUPR_EVT); + + period_addr += period_len; } return d; @@ -2517,29 +2670,12 @@ udma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, return NULL; } - if (len < SZ_64K) { - num_tr = 1; - tr0_cnt0 = len; - tr0_cnt1 = 1; - } else { - unsigned long align_to = __ffs(src | dest); - - if (align_to > 3) - align_to = 3; - /* - * Keep simple: tr0: SZ_64K-alignment blocks, - * tr1: the remaining - */ - num_tr = 2; - tr0_cnt0 = (SZ_64K - BIT(align_to)); - if (len / tr0_cnt0 >= SZ_64K) { - dev_err(uc->ud->dev, "size %zu is not supported\n", - len); - return NULL; - } - - tr0_cnt1 = len / tr0_cnt0; - tr1_cnt0 = len % tr0_cnt0; + num_tr = udma_get_tr_counters(len, __ffs(src | dest), &tr0_cnt0, + &tr0_cnt1, &tr1_cnt0); + if (num_tr < 0) { + dev_err(uc->ud->dev, "size %zu is not supported\n", + len); + return NULL; } d = udma_alloc_tr_desc(uc, tr_size, num_tr, DMA_MEM_TO_MEM); @@ -2631,6 +2767,9 @@ static enum dma_status udma_tx_status(struct dma_chan *chan, ret = dma_cookie_status(chan, cookie, txstate); + if (!udma_is_chan_running(uc)) + ret = DMA_COMPLETE; + if (ret == DMA_IN_PROGRESS && udma_is_chan_paused(uc)) ret = DMA_PAUSED; @@ -2697,11 +2836,8 @@ static int udma_pause(struct dma_chan *chan) { struct udma_chan *uc = to_udma_chan(chan); - if (!uc->desc) - return -EINVAL; - /* pause the channel */ - switch (uc->desc->dir) { + switch (uc->config.dir) { case DMA_DEV_TO_MEM: udma_rchanrt_update_bits(uc->rchan, UDMA_RCHAN_RT_PEER_RT_EN_REG, @@ -2730,11 +2866,8 @@ static int udma_resume(struct dma_chan *chan) { struct udma_chan *uc = to_udma_chan(chan); - if (!uc->desc) - return -EINVAL; - /* resume the channel */ - switch (uc->desc->dir) { + switch (uc->config.dir) { case DMA_DEV_TO_MEM: udma_rchanrt_update_bits(uc->rchan, UDMA_RCHAN_RT_PEER_RT_EN_REG, @@ -3248,6 +3381,98 @@ static int udma_setup_resources(struct udma_dev *ud) return ch_count; } +static int udma_setup_rx_flush(struct udma_dev *ud) +{ + struct udma_rx_flush *rx_flush = &ud->rx_flush; + struct cppi5_desc_hdr_t *tr_desc; + struct cppi5_tr_type1_t *tr_req; + struct cppi5_host_desc_t *desc; + struct device *dev = ud->dev; + struct udma_hwdesc *hwdesc; + size_t tr_size; + + /* Allocate 1K buffer for discarded data on RX channel teardown */ + rx_flush->buffer_size = SZ_1K; + rx_flush->buffer_vaddr = devm_kzalloc(dev, rx_flush->buffer_size, + GFP_KERNEL); + if (!rx_flush->buffer_vaddr) + return -ENOMEM; + + rx_flush->buffer_paddr = dma_map_single(dev, rx_flush->buffer_vaddr, + rx_flush->buffer_size, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, rx_flush->buffer_paddr)) + return -ENOMEM; + + /* Set up descriptor to be used for TR mode */ + hwdesc = &rx_flush->hwdescs[0]; + tr_size = sizeof(struct cppi5_tr_type1_t); + hwdesc->cppi5_desc_size = cppi5_trdesc_calc_size(tr_size, 1); + hwdesc->cppi5_desc_size = ALIGN(hwdesc->cppi5_desc_size, + ud->desc_align); + + hwdesc->cppi5_desc_vaddr = devm_kzalloc(dev, hwdesc->cppi5_desc_size, + GFP_KERNEL); + if (!hwdesc->cppi5_desc_vaddr) + return -ENOMEM; + + hwdesc->cppi5_desc_paddr = dma_map_single(dev, hwdesc->cppi5_desc_vaddr, + hwdesc->cppi5_desc_size, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, hwdesc->cppi5_desc_paddr)) + return -ENOMEM; + + /* Start of the TR req records */ + hwdesc->tr_req_base = hwdesc->cppi5_desc_vaddr + tr_size; + /* Start address of the TR response array */ + hwdesc->tr_resp_base = hwdesc->tr_req_base + tr_size; + + tr_desc = hwdesc->cppi5_desc_vaddr; + cppi5_trdesc_init(tr_desc, 1, tr_size, 0, 0); + cppi5_desc_set_pktids(tr_desc, 0, CPPI5_INFO1_DESC_FLOWID_DEFAULT); + cppi5_desc_set_retpolicy(tr_desc, 0, 0); + + tr_req = hwdesc->tr_req_base; + cppi5_tr_init(&tr_req->flags, CPPI5_TR_TYPE1, false, false, + CPPI5_TR_EVENT_SIZE_COMPLETION, 0); + cppi5_tr_csf_set(&tr_req->flags, CPPI5_TR_CSF_SUPR_EVT); + + tr_req->addr = rx_flush->buffer_paddr; + tr_req->icnt0 = rx_flush->buffer_size; + tr_req->icnt1 = 1; + + /* Set up descriptor to be used for packet mode */ + hwdesc = &rx_flush->hwdescs[1]; + hwdesc->cppi5_desc_size = ALIGN(sizeof(struct cppi5_host_desc_t) + + CPPI5_INFO0_HDESC_EPIB_SIZE + + CPPI5_INFO0_HDESC_PSDATA_MAX_SIZE, + ud->desc_align); + + hwdesc->cppi5_desc_vaddr = devm_kzalloc(dev, hwdesc->cppi5_desc_size, + GFP_KERNEL); + if (!hwdesc->cppi5_desc_vaddr) + return -ENOMEM; + + hwdesc->cppi5_desc_paddr = dma_map_single(dev, hwdesc->cppi5_desc_vaddr, + hwdesc->cppi5_desc_size, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, hwdesc->cppi5_desc_paddr)) + return -ENOMEM; + + desc = hwdesc->cppi5_desc_vaddr; + cppi5_hdesc_init(desc, 0, 0); + cppi5_desc_set_pktids(&desc->hdr, 0, CPPI5_INFO1_DESC_FLOWID_DEFAULT); + cppi5_desc_set_retpolicy(&desc->hdr, 0, 0); + + cppi5_hdesc_attach_buf(desc, + rx_flush->buffer_paddr, rx_flush->buffer_size, + rx_flush->buffer_paddr, rx_flush->buffer_size); + + dma_sync_single_for_device(dev, hwdesc->cppi5_desc_paddr, + hwdesc->cppi5_desc_size, DMA_TO_DEVICE); + return 0; +} + #define TI_UDMAC_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \ @@ -3361,6 +3586,10 @@ static int udma_probe(struct platform_device *pdev) if (ud->desc_align < dma_get_cache_alignment()) ud->desc_align = dma_get_cache_alignment(); + ret = udma_setup_rx_flush(ud); + if (ret) + return ret; + for (i = 0; i < ud->tchan_cnt; i++) { struct udma_tchan *tchan = &ud->tchans[i]; diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c index 2d263382d797..880ffd833718 100644 --- a/drivers/edac/synopsys_edac.c +++ b/drivers/edac/synopsys_edac.c @@ -479,20 +479,14 @@ static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p) pinf = &p->ceinfo; if (!priv->p_data->quirks) { snprintf(priv->message, SYNPS_EDAC_MSG_SIZE, - "DDR ECC error type:%s Row %d Bank %d Col %d ", - "CE", pinf->row, pinf->bank, pinf->col); - snprintf(priv->message, SYNPS_EDAC_MSG_SIZE, - "Bit Position: %d Data: 0x%08x\n", + "DDR ECC error type:%s Row %d Bank %d Col %d Bit Position: %d Data: 0x%08x", + "CE", pinf->row, pinf->bank, pinf->col, pinf->bitpos, pinf->data); } else { snprintf(priv->message, SYNPS_EDAC_MSG_SIZE, - "DDR ECC error type:%s Row %d Bank %d Col %d ", - "CE", pinf->row, pinf->bank, pinf->col); - snprintf(priv->message, SYNPS_EDAC_MSG_SIZE, - "BankGroup Number %d Block Number %d ", - pinf->bankgrpnr, pinf->blknr); - snprintf(priv->message, SYNPS_EDAC_MSG_SIZE, - "Bit Position: %d Data: 0x%08x\n", + "DDR ECC error type:%s Row %d Bank %d Col %d BankGroup Number %d Block Number %d Bit Position: %d Data: 0x%08x", + "CE", pinf->row, pinf->bank, pinf->col, + pinf->bankgrpnr, pinf->blknr, pinf->bitpos, pinf->data); } @@ -509,10 +503,8 @@ static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p) "UE", pinf->row, pinf->bank, pinf->col); } else { snprintf(priv->message, SYNPS_EDAC_MSG_SIZE, - "DDR ECC error type :%s Row %d Bank %d Col %d ", - "UE", pinf->row, pinf->bank, pinf->col); - snprintf(priv->message, SYNPS_EDAC_MSG_SIZE, - "BankGroup Number %d Block Number %d", + "DDR ECC error type :%s Row %d Bank %d Col %d BankGroup Number %d Block Number %d", + "UE", pinf->row, pinf->bank, pinf->col, pinf->bankgrpnr, pinf->blknr); } diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 621220ab3d0e..21ea99f65113 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -552,7 +552,7 @@ int __init efi_config_parse_tables(void *config_tables, int count, int sz, seed = early_memremap(efi.rng_seed, sizeof(*seed)); if (seed != NULL) { - size = seed->size; + size = READ_ONCE(seed->size); early_memunmap(seed, sizeof(*seed)); } else { pr_err("Could not map UEFI random seed!\n"); @@ -562,7 +562,7 @@ int __init efi_config_parse_tables(void *config_tables, int count, int sz, sizeof(*seed) + size); if (seed != NULL) { pr_notice("seeding entropy pool\n"); - add_bootloader_randomness(seed->bits, seed->size); + add_bootloader_randomness(seed->bits, size); early_memunmap(seed, sizeof(*seed) + size); } else { pr_err("Could not map UEFI random seed!\n"); diff --git a/drivers/firmware/imx/imx-scu.c b/drivers/firmware/imx/imx-scu.c index 03b43b7a6d1d..f71eaa5bf52d 100644 --- a/drivers/firmware/imx/imx-scu.c +++ b/drivers/firmware/imx/imx-scu.c @@ -29,6 +29,7 @@ struct imx_sc_chan { struct mbox_client cl; struct mbox_chan *ch; int idx; + struct completion tx_done; }; struct imx_sc_ipc { @@ -100,6 +101,14 @@ int imx_scu_get_handle(struct imx_sc_ipc **ipc) } EXPORT_SYMBOL(imx_scu_get_handle); +/* Callback called when the word of a message is ack-ed, eg read by SCU */ +static void imx_scu_tx_done(struct mbox_client *cl, void *mssg, int r) +{ + struct imx_sc_chan *sc_chan = container_of(cl, struct imx_sc_chan, cl); + + complete(&sc_chan->tx_done); +} + static void imx_scu_rx_callback(struct mbox_client *c, void *msg) { struct imx_sc_chan *sc_chan = container_of(c, struct imx_sc_chan, cl); @@ -149,6 +158,19 @@ static int imx_scu_ipc_write(struct imx_sc_ipc *sc_ipc, void *msg) for (i = 0; i < hdr->size; i++) { sc_chan = &sc_ipc->chans[i % 4]; + + /* + * SCU requires that all messages words are written + * sequentially but linux MU driver implements multiple + * independent channels for each register so ordering between + * different channels must be ensured by SCU API interface. + * + * Wait for tx_done before every send to ensure that no + * queueing happens at the mailbox channel level. + */ + wait_for_completion(&sc_chan->tx_done); + reinit_completion(&sc_chan->tx_done); + ret = mbox_send_message(sc_chan->ch, &data[i]); if (ret < 0) return ret; @@ -247,6 +269,11 @@ static int imx_scu_probe(struct platform_device *pdev) cl->knows_txdone = true; cl->rx_callback = imx_scu_rx_callback; + /* Initial tx_done completion as "done" */ + cl->tx_done = imx_scu_tx_done; + init_completion(&sc_chan->tx_done); + complete(&sc_chan->tx_done); + sc_chan->sc_ipc = sc_ipc; sc_chan->idx = i % 4; sc_chan->ch = mbox_request_channel_byname(cl, chan_name); diff --git a/drivers/firmware/imx/misc.c b/drivers/firmware/imx/misc.c index 4b56a587dacd..d073cb3ce699 100644 --- a/drivers/firmware/imx/misc.c +++ b/drivers/firmware/imx/misc.c @@ -16,7 +16,7 @@ struct imx_sc_msg_req_misc_set_ctrl { u32 ctrl; u32 val; u16 resource; -} __packed; +} __packed __aligned(4); struct imx_sc_msg_req_cpu_start { struct imx_sc_rpc_msg hdr; @@ -24,18 +24,18 @@ struct imx_sc_msg_req_cpu_start { u32 address_lo; u16 resource; u8 enable; -} __packed; +} __packed __aligned(4); struct imx_sc_msg_req_misc_get_ctrl { struct imx_sc_rpc_msg hdr; u32 ctrl; u16 resource; -} __packed; +} __packed __aligned(4); struct imx_sc_msg_resp_misc_get_ctrl { struct imx_sc_rpc_msg hdr; u32 val; -} __packed; +} __packed __aligned(4); /* * This function sets a miscellaneous control value. diff --git a/drivers/firmware/imx/scu-pd.c b/drivers/firmware/imx/scu-pd.c index b556612207e5..af3ae0087de4 100644 --- a/drivers/firmware/imx/scu-pd.c +++ b/drivers/firmware/imx/scu-pd.c @@ -61,7 +61,7 @@ struct imx_sc_msg_req_set_resource_power_mode { struct imx_sc_rpc_msg hdr; u16 resource; u8 mode; -} __packed; +} __packed __aligned(4); #define IMX_SCU_PD_NAME_SIZE 20 struct imx_sc_pm_domain { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 94e2fd758e01..42f4febe24c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1389,7 +1389,7 @@ amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe, static struct drm_driver kms_driver = { .driver_features = - DRIVER_USE_AGP | DRIVER_ATOMIC | + DRIVER_ATOMIC | DRIVER_GEM | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index d3c27a3c43f6..7546da0cc70c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -195,6 +195,7 @@ struct amdgpu_gmc { uint32_t srbm_soft_reset; bool prt_warning; uint64_t stolen_size; + uint32_t sdpif_register; /* apertures */ u64 shared_aperture_start; u64 shared_aperture_end; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 22bbb36c768e..02702597ddeb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -52,7 +52,7 @@ * 1. Primary ring * 2. Async ring */ -#define GFX10_NUM_GFX_RINGS 2 +#define GFX10_NUM_GFX_RINGS_NV1X 1 #define GFX10_MEC_HPD_SIZE 2048 #define F32_CE_PROGRAM_RAM_SIZE 65536 @@ -1304,7 +1304,7 @@ static int gfx_v10_0_sw_init(void *handle) case CHIP_NAVI14: case CHIP_NAVI12: adev->gfx.me.num_me = 1; - adev->gfx.me.num_pipe_per_me = 2; + adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; adev->gfx.mec.num_mec = 2; adev->gfx.mec.num_pipe_per_mec = 4; @@ -2710,18 +2710,20 @@ static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev) amdgpu_ring_commit(ring); /* submit cs packet to copy state 0 to next available state */ - ring = &adev->gfx.gfx_ring[1]; - r = amdgpu_ring_alloc(ring, 2); - if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); - return r; - } - - amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); - amdgpu_ring_write(ring, 0); + if (adev->gfx.num_gfx_rings > 1) { + /* maximum supported gfx ring is 2 */ + ring = &adev->gfx.gfx_ring[1]; + r = amdgpu_ring_alloc(ring, 2); + if (r) { + DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); + return r; + } - amdgpu_ring_commit(ring); + amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_commit(ring); + } return 0; } @@ -2818,39 +2820,41 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); /* Init gfx ring 1 for pipe 1 */ - mutex_lock(&adev->srbm_mutex); - gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1); - ring = &adev->gfx.gfx_ring[1]; - rb_bufsz = order_base_2(ring->ring_size / 8); - tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); - tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2); - WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp); - /* Initialize the ring buffer's write pointers */ - ring->wptr = 0; - WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr)); - /* Set the wb address wether it's enabled or not */ - rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); - WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr)); - WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & - CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); - wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, - lower_32_bits(wptr_gpu_addr)); - WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, - upper_32_bits(wptr_gpu_addr)); - - mdelay(1); - WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp); - - rb_addr = ring->gpu_addr >> 8; - WREG32_SOC15(GC, 0, mmCP_RB1_BASE, rb_addr); - WREG32_SOC15(GC, 0, mmCP_RB1_BASE_HI, upper_32_bits(rb_addr)); - WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1); - - gfx_v10_0_cp_gfx_set_doorbell(adev, ring); - mutex_unlock(&adev->srbm_mutex); - + if (adev->gfx.num_gfx_rings > 1) { + mutex_lock(&adev->srbm_mutex); + gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1); + /* maximum supported gfx ring is 2 */ + ring = &adev->gfx.gfx_ring[1]; + rb_bufsz = order_base_2(ring->ring_size / 8); + tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2); + WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp); + /* Initialize the ring buffer's write pointers */ + ring->wptr = 0; + WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr)); + /* Set the wb address wether it's enabled or not */ + rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr)); + WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & + CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); + wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, + lower_32_bits(wptr_gpu_addr)); + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, + upper_32_bits(wptr_gpu_addr)); + + mdelay(1); + WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp); + + rb_addr = ring->gpu_addr >> 8; + WREG32_SOC15(GC, 0, mmCP_RB1_BASE, rb_addr); + WREG32_SOC15(GC, 0, mmCP_RB1_BASE_HI, upper_32_bits(rb_addr)); + WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1); + + gfx_v10_0_cp_gfx_set_doorbell(adev, ring); + mutex_unlock(&adev->srbm_mutex); + } /* Switch to pipe 0 */ mutex_lock(&adev->srbm_mutex); gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0); @@ -3513,6 +3517,7 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring) /* reset ring buffer */ ring->wptr = 0; + atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0); amdgpu_ring_clear_ring(ring); } else { amdgpu_ring_clear_ring(ring); @@ -3966,7 +3971,8 @@ static int gfx_v10_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS; + adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_NV1X; + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; gfx_v10_0_set_kiq_pm4_funcs(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 3afdbbd6aaad..889154a78c4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3663,6 +3663,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) /* reset ring buffer */ ring->wptr = 0; + atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0); amdgpu_ring_clear_ring(ring); } else { amdgpu_ring_clear_ring(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 90216abf14a4..cc0c273a86f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1272,6 +1272,19 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) } /** + * gmc_v9_0_restore_registers - restores regs + * + * @adev: amdgpu_device pointer + * + * This restores register values, saved at suspend. + */ +static void gmc_v9_0_restore_registers(struct amdgpu_device *adev) +{ + if (adev->asic_type == CHIP_RAVEN) + WREG32(mmDCHUBBUB_SDPIF_MMIO_CNTRL_0, adev->gmc.sdpif_register); +} + +/** * gmc_v9_0_gart_enable - gart enable * * @adev: amdgpu_device pointer @@ -1377,6 +1390,20 @@ static int gmc_v9_0_hw_init(void *handle) } /** + * gmc_v9_0_save_registers - saves regs + * + * @adev: amdgpu_device pointer + * + * This saves potential register values that should be + * restored upon resume + */ +static void gmc_v9_0_save_registers(struct amdgpu_device *adev) +{ + if (adev->asic_type == CHIP_RAVEN) + adev->gmc.sdpif_register = RREG32(mmDCHUBBUB_SDPIF_MMIO_CNTRL_0); +} + +/** * gmc_v9_0_gart_disable - gart disable * * @adev: amdgpu_device pointer @@ -1412,9 +1439,16 @@ static int gmc_v9_0_hw_fini(void *handle) static int gmc_v9_0_suspend(void *handle) { + int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - return gmc_v9_0_hw_fini(adev); + r = gmc_v9_0_hw_fini(adev); + if (r) + return r; + + gmc_v9_0_save_registers(adev); + + return 0; } static int gmc_v9_0_resume(void *handle) @@ -1422,6 +1456,7 @@ static int gmc_v9_0_resume(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + gmc_v9_0_restore_registers(adev); r = gmc_v9_0_hw_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 2b488dfb2f21..d8945c31b622 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -89,6 +89,13 @@ #define HDP_MEM_POWER_CTRL__RC_MEM_POWER_CTRL_EN_MASK 0x00010000L #define HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK 0x00020000L #define mmHDP_MEM_POWER_CTRL_BASE_IDX 0 + +/* for Vega20/arcturus regiter offset change */ +#define mmROM_INDEX_VG20 0x00e4 +#define mmROM_INDEX_VG20_BASE_IDX 0 +#define mmROM_DATA_VG20 0x00e5 +#define mmROM_DATA_VG20_BASE_IDX 0 + /* * Indirect registers accessor */ @@ -309,6 +316,8 @@ static bool soc15_read_bios_from_rom(struct amdgpu_device *adev, { u32 *dw_ptr; u32 i, length_dw; + uint32_t rom_index_offset; + uint32_t rom_data_offset; if (bios == NULL) return false; @@ -321,11 +330,23 @@ static bool soc15_read_bios_from_rom(struct amdgpu_device *adev, dw_ptr = (u32 *)bios; length_dw = ALIGN(length_bytes, 4) / 4; + switch (adev->asic_type) { + case CHIP_VEGA20: + case CHIP_ARCTURUS: + rom_index_offset = SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX_VG20); + rom_data_offset = SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA_VG20); + break; + default: + rom_index_offset = SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX); + rom_data_offset = SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA); + break; + } + /* set rom index to 0 */ - WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX), 0); + WREG32(rom_index_offset, 0); /* read out the rom data */ for (i = 0; i < length_dw; i++) - dw_ptr[i] = RREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA)); + dw_ptr[i] = RREG32(rom_data_offset); return true; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e8f66fbf399e..e997251a8b57 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1422,6 +1422,73 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend) drm_kms_helper_hotplug_event(dev); } +static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev) +{ + struct smu_context *smu = &adev->smu; + int ret = 0; + + if (!is_support_sw_smu(adev)) + return 0; + + /* This interface is for dGPU Navi1x.Linux dc-pplib interface depends + * on window driver dc implementation. + * For Navi1x, clock settings of dcn watermarks are fixed. the settings + * should be passed to smu during boot up and resume from s3. + * boot up: dc calculate dcn watermark clock settings within dc_create, + * dcn20_resource_construct + * then call pplib functions below to pass the settings to smu: + * smu_set_watermarks_for_clock_ranges + * smu_set_watermarks_table + * navi10_set_watermarks_table + * smu_write_watermarks_table + * + * For Renoir, clock settings of dcn watermark are also fixed values. + * dc has implemented different flow for window driver: + * dc_hardware_init / dc_set_power_state + * dcn10_init_hw + * notify_wm_ranges + * set_wm_ranges + * -- Linux + * smu_set_watermarks_for_clock_ranges + * renoir_set_watermarks_table + * smu_write_watermarks_table + * + * For Linux, + * dc_hardware_init -> amdgpu_dm_init + * dc_set_power_state --> dm_resume + * + * therefore, this function apply to navi10/12/14 but not Renoir + * * + */ + switch(adev->asic_type) { + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + break; + default: + return 0; + } + + mutex_lock(&smu->mutex); + + /* pass data to smu controller */ + if ((smu->watermarks_bitmap & WATERMARKS_EXIST) && + !(smu->watermarks_bitmap & WATERMARKS_LOADED)) { + ret = smu_write_watermarks_table(smu); + + if (ret) { + mutex_unlock(&smu->mutex); + DRM_ERROR("Failed to update WMTABLE!\n"); + return ret; + } + smu->watermarks_bitmap |= WATERMARKS_LOADED; + } + + mutex_unlock(&smu->mutex); + + return 0; +} + /** * dm_hw_init() - Initialize DC device * @handle: The base driver device containing the amdgpu_dm device. @@ -1700,6 +1767,8 @@ static int dm_resume(void *handle) amdgpu_dm_irq_resume_late(adev); + amdgpu_dm_smu_write_watermarks_table(adev); + return 0; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 5672f7765919..da73161043d5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -451,6 +451,7 @@ static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, aconnector->dc_sink); dc_sink_release(aconnector->dc_sink); aconnector->dc_sink = NULL; + aconnector->dc_link->cur_link_settings.lane_count = 0; } drm_connector_unregister(connector); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c index f36a0d8cedfe..446ba0a7a4b3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c @@ -840,8 +840,8 @@ static void hubbub1_det_request_size( hubbub1_get_blk256_size(&blk256_width, &blk256_height, bpe); - swath_bytes_horz_wc = height * blk256_height * bpe; - swath_bytes_vert_wc = width * blk256_width * bpe; + swath_bytes_horz_wc = width * blk256_height * bpe; + swath_bytes_vert_wc = height * blk256_width * bpe; *req128_horz_wc = (2 * swath_bytes_horz_wc <= detile_buf_size) ? false : /* full 256B request */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 85f90f3e24cb..e310d67c399a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -335,6 +335,117 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_0_soc = { .use_urgent_burst_bw = 0 }; +struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv14_soc = { + .clock_limits = { + { + .state = 0, + .dcfclk_mhz = 560.0, + .fabricclk_mhz = 560.0, + .dispclk_mhz = 513.0, + .dppclk_mhz = 513.0, + .phyclk_mhz = 540.0, + .socclk_mhz = 560.0, + .dscclk_mhz = 171.0, + .dram_speed_mts = 8960.0, + }, + { + .state = 1, + .dcfclk_mhz = 694.0, + .fabricclk_mhz = 694.0, + .dispclk_mhz = 642.0, + .dppclk_mhz = 642.0, + .phyclk_mhz = 600.0, + .socclk_mhz = 694.0, + .dscclk_mhz = 214.0, + .dram_speed_mts = 11104.0, + }, + { + .state = 2, + .dcfclk_mhz = 875.0, + .fabricclk_mhz = 875.0, + .dispclk_mhz = 734.0, + .dppclk_mhz = 734.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 875.0, + .dscclk_mhz = 245.0, + .dram_speed_mts = 14000.0, + }, + { + .state = 3, + .dcfclk_mhz = 1000.0, + .fabricclk_mhz = 1000.0, + .dispclk_mhz = 1100.0, + .dppclk_mhz = 1100.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1000.0, + .dscclk_mhz = 367.0, + .dram_speed_mts = 16000.0, + }, + { + .state = 4, + .dcfclk_mhz = 1200.0, + .fabricclk_mhz = 1200.0, + .dispclk_mhz = 1284.0, + .dppclk_mhz = 1284.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1200.0, + .dscclk_mhz = 428.0, + .dram_speed_mts = 16000.0, + }, + /*Extra state, no dispclk ramping*/ + { + .state = 5, + .dcfclk_mhz = 1200.0, + .fabricclk_mhz = 1200.0, + .dispclk_mhz = 1284.0, + .dppclk_mhz = 1284.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1200.0, + .dscclk_mhz = 428.0, + .dram_speed_mts = 16000.0, + }, + }, + .num_states = 5, + .sr_exit_time_us = 8.6, + .sr_enter_plus_exit_time_us = 10.9, + .urgent_latency_us = 4.0, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 40.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 40.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0, + .max_avg_sdp_bw_use_normal_percent = 40.0, + .max_avg_dram_bw_use_normal_percent = 40.0, + .writeback_latency_us = 12.0, + .ideal_dram_bw_after_urgent_percent = 40.0, + .max_request_size_bytes = 256, + .dram_channel_width_bytes = 2, + .fabric_datapath_to_dcn_data_return_bytes = 64, + .dcn_downspread_percent = 0.5, + .downspread_percent = 0.38, + .dram_page_open_time_ns = 50.0, + .dram_rw_turnaround_time_ns = 17.5, + .dram_return_buffer_per_channel_bytes = 8192, + .round_trip_ping_latency_dcfclk_cycles = 131, + .urgent_out_of_order_return_per_channel_bytes = 256, + .channel_interleave_bytes = 256, + .num_banks = 8, + .num_chans = 8, + .vmm_page_size_bytes = 4096, + .dram_clock_change_latency_us = 404.0, + .dummy_pstate_latency_us = 5.0, + .writeback_dram_clock_change_latency_us = 23.0, + .return_bus_width_bytes = 64, + .dispclk_dppclk_vco_speed_mhz = 3850, + .xfc_bus_transport_time_us = 20, + .xfc_xbuf_latency_tolerance_us = 4, + .use_urgent_burst_bw = 0 +}; + struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv12_soc = { 0 }; #ifndef mmDP0_DP_DPHY_INTERNAL_CTRL @@ -3291,6 +3402,9 @@ void dcn20_patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st static struct _vcs_dpi_soc_bounding_box_st *get_asic_rev_soc_bb( uint32_t hw_internal_rev) { + if (ASICREV_IS_NAVI14_M(hw_internal_rev)) + return &dcn2_0_nv14_soc; + if (ASICREV_IS_NAVI12_P(hw_internal_rev)) return &dcn2_0_nv12_soc; diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h index b6f74bf4af02..27bb8c1ab858 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h @@ -7376,6 +7376,8 @@ #define mmCRTC4_CRTC_DRR_CONTROL 0x0f3e #define mmCRTC4_CRTC_DRR_CONTROL_BASE_IDX 2 +#define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0 0x395d +#define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0_BASE_IDX 2 // addressBlock: dce_dc_fmt4_dispdec // base address: 0x2000 diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 99ad4ddbe12f..96e81c7bc266 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -222,7 +222,7 @@ int smu_set_soft_freq_range(struct smu_context *smu, enum smu_clk_type clk_type, { int ret = 0; - if (min <= 0 && max <= 0) + if (min < 0 && max < 0) return -EINVAL; if (!smu_clk_dpm_is_enabled(smu, clk_type)) @@ -2006,8 +2006,11 @@ int smu_set_watermarks_for_clock_ranges(struct smu_context *smu, smu_feature_is_enabled(smu, SMU_FEATURE_DPM_DCEFCLK_BIT) && smu_feature_is_enabled(smu, SMU_FEATURE_DPM_SOCCLK_BIT)) { smu_set_watermarks_table(smu, table, clock_ranges); - smu->watermarks_bitmap |= WATERMARKS_EXIST; - smu->watermarks_bitmap &= ~WATERMARKS_LOADED; + + if (!(smu->watermarks_bitmap & WATERMARKS_EXIST)) { + smu->watermarks_bitmap |= WATERMARKS_EXIST; + smu->watermarks_bitmap &= ~WATERMARKS_LOADED; + } } mutex_unlock(&smu->mutex); diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index 0d73a49166af..aed4d6e60907 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -1063,15 +1063,6 @@ static int navi10_display_config_changed(struct smu_context *smu) int ret = 0; if ((smu->watermarks_bitmap & WATERMARKS_EXIST) && - !(smu->watermarks_bitmap & WATERMARKS_LOADED)) { - ret = smu_write_watermarks_table(smu); - if (ret) - return ret; - - smu->watermarks_bitmap |= WATERMARKS_LOADED; - } - - if ((smu->watermarks_bitmap & WATERMARKS_EXIST) && smu_feature_is_supported(smu, SMU_FEATURE_DPM_DCEFCLK_BIT) && smu_feature_is_supported(smu, SMU_FEATURE_DPM_SOCCLK_BIT)) { ret = smu_send_smc_msg_with_param(smu, SMU_MSG_NumOfDisplays, @@ -1493,6 +1484,7 @@ static int navi10_set_watermarks_table(struct smu_context *smu, *clock_ranges) { int i; + int ret = 0; Watermarks_t *table = watermarks; if (!table || !clock_ranges) @@ -1544,6 +1536,18 @@ static int navi10_set_watermarks_table(struct smu_context *smu, clock_ranges->wm_mcif_clocks_ranges[i].wm_set_id; } + smu->watermarks_bitmap |= WATERMARKS_EXIST; + + /* pass data to smu controller */ + if (!(smu->watermarks_bitmap & WATERMARKS_LOADED)) { + ret = smu_write_watermarks_table(smu); + if (ret) { + pr_err("Failed to update WMTABLE!"); + return ret; + } + smu->watermarks_bitmap |= WATERMARKS_LOADED; + } + return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c index 861e6410363b..3ad0f4aa3aa3 100644 --- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c @@ -111,8 +111,8 @@ static struct smu_12_0_cmn2aisc_mapping renoir_clk_map[SMU_CLK_COUNT] = { CLK_MAP(GFXCLK, CLOCK_GFXCLK), CLK_MAP(SCLK, CLOCK_GFXCLK), CLK_MAP(SOCCLK, CLOCK_SOCCLK), - CLK_MAP(UCLK, CLOCK_UMCCLK), - CLK_MAP(MCLK, CLOCK_UMCCLK), + CLK_MAP(UCLK, CLOCK_FCLK), + CLK_MAP(MCLK, CLOCK_FCLK), }; static struct smu_12_0_cmn2aisc_mapping renoir_table_map[SMU_TABLE_COUNT] = { @@ -280,7 +280,7 @@ static int renoir_print_clk_levels(struct smu_context *smu, break; case SMU_MCLK: count = NUM_MEMCLK_DPM_LEVELS; - cur_value = metrics.ClockFrequency[CLOCK_UMCCLK]; + cur_value = metrics.ClockFrequency[CLOCK_FCLK]; break; case SMU_DCEFCLK: count = NUM_DCFCLK_DPM_LEVELS; @@ -806,9 +806,10 @@ static int renoir_set_watermarks_table( clock_ranges->wm_mcif_clocks_ranges[i].wm_set_id; } + smu->watermarks_bitmap |= WATERMARKS_EXIST; + /* pass data to smu controller */ - if ((smu->watermarks_bitmap & WATERMARKS_EXIST) && - !(smu->watermarks_bitmap & WATERMARKS_LOADED)) { + if (!(smu->watermarks_bitmap & WATERMARKS_LOADED)) { ret = smu_write_watermarks_table(smu); if (ret) { pr_err("Failed to update WMTABLE!"); diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index b06c057a9002..c9e5ce135fd4 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -978,8 +978,12 @@ int smu_v11_0_init_max_sustainable_clocks(struct smu_context *smu) struct smu_11_0_max_sustainable_clocks *max_sustainable_clocks; int ret = 0; - max_sustainable_clocks = kzalloc(sizeof(struct smu_11_0_max_sustainable_clocks), + if (!smu->smu_table.max_sustainable_clocks) + max_sustainable_clocks = kzalloc(sizeof(struct smu_11_0_max_sustainable_clocks), GFP_KERNEL); + else + max_sustainable_clocks = smu->smu_table.max_sustainable_clocks; + smu->smu_table.max_sustainable_clocks = (void *)max_sustainable_clocks; max_sustainable_clocks->uclock = smu->smu_table.boot_values.uclk / 100; diff --git a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c index 870e6db2907e..518e6597bf2d 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c @@ -458,9 +458,6 @@ int smu_v12_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_ { int ret = 0; - if (max < min) - return -EINVAL; - switch (clk_type) { case SMU_GFXCLK: case SMU_SCLK: diff --git a/drivers/gpu/drm/bridge/analogix/analogix-anx6345.c b/drivers/gpu/drm/bridge/analogix/analogix-anx6345.c index 56f55c53abfd..2dfa2fd2a23b 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix-anx6345.c +++ b/drivers/gpu/drm/bridge/analogix/analogix-anx6345.c @@ -210,8 +210,7 @@ static int anx6345_dp_link_training(struct anx6345 *anx6345) if (err) return err; - dpcd[0] = drm_dp_max_link_rate(anx6345->dpcd); - dpcd[0] = drm_dp_link_rate_to_bw_code(dpcd[0]); + dpcd[0] = dp_bw; err = regmap_write(anx6345->map[I2C_IDX_DPTX], SP_DP_MAIN_LINK_BW_SET_REG, dpcd[0]); if (err) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index cce0b1bba591..ed0fea2ac322 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1935,7 +1935,7 @@ static u8 drm_dp_calculate_rad(struct drm_dp_mst_port *port, return parent_lct + 1; } -static bool drm_dp_mst_is_dp_mst_end_device(u8 pdt, bool mcs) +static bool drm_dp_mst_is_end_device(u8 pdt, bool mcs) { switch (pdt) { case DP_PEER_DEVICE_DP_LEGACY_CONV: @@ -1965,13 +1965,13 @@ drm_dp_port_set_pdt(struct drm_dp_mst_port *port, u8 new_pdt, /* Teardown the old pdt, if there is one */ if (port->pdt != DP_PEER_DEVICE_NONE) { - if (drm_dp_mst_is_dp_mst_end_device(port->pdt, port->mcs)) { + if (drm_dp_mst_is_end_device(port->pdt, port->mcs)) { /* * If the new PDT would also have an i2c bus, * don't bother with reregistering it */ if (new_pdt != DP_PEER_DEVICE_NONE && - drm_dp_mst_is_dp_mst_end_device(new_pdt, new_mcs)) { + drm_dp_mst_is_end_device(new_pdt, new_mcs)) { port->pdt = new_pdt; port->mcs = new_mcs; return 0; @@ -1991,7 +1991,7 @@ drm_dp_port_set_pdt(struct drm_dp_mst_port *port, u8 new_pdt, port->mcs = new_mcs; if (port->pdt != DP_PEER_DEVICE_NONE) { - if (drm_dp_mst_is_dp_mst_end_device(port->pdt, port->mcs)) { + if (drm_dp_mst_is_end_device(port->pdt, port->mcs)) { /* add i2c over sideband */ ret = drm_dp_mst_register_i2c_bus(&port->aux); } else { @@ -2172,7 +2172,7 @@ drm_dp_mst_port_add_connector(struct drm_dp_mst_branch *mstb, } if (port->pdt != DP_PEER_DEVICE_NONE && - drm_dp_mst_is_dp_mst_end_device(port->pdt, port->mcs)) { + drm_dp_mst_is_end_device(port->pdt, port->mcs)) { port->cached_edid = drm_get_edid(port->connector, &port->aux.ddc); drm_connector_set_tile_property(port->connector); @@ -2302,14 +2302,18 @@ drm_dp_mst_handle_link_address_port(struct drm_dp_mst_branch *mstb, mutex_unlock(&mgr->lock); } - if (old_ddps != port->ddps) { - if (port->ddps) { - if (!port->input) { - drm_dp_send_enum_path_resources(mgr, mstb, - port); - } + /* + * Reprobe PBN caps on both hotplug, and when re-probing the link + * for our parent mstb + */ + if (old_ddps != port->ddps || !created) { + if (port->ddps && !port->input) { + ret = drm_dp_send_enum_path_resources(mgr, mstb, + port); + if (ret == 1) + changed = true; } else { - port->available_pbn = 0; + port->full_pbn = 0; } } @@ -2401,11 +2405,10 @@ drm_dp_mst_handle_conn_stat(struct drm_dp_mst_branch *mstb, port->ddps = conn_stat->displayport_device_plug_status; if (old_ddps != port->ddps) { - if (port->ddps) { - dowork = true; - } else { - port->available_pbn = 0; - } + if (port->ddps && !port->input) + drm_dp_send_enum_path_resources(mgr, mstb, port); + else + port->full_pbn = 0; } new_pdt = port->input ? DP_PEER_DEVICE_NONE : conn_stat->peer_device_type; @@ -2556,13 +2559,6 @@ static int drm_dp_check_and_send_link_address(struct drm_dp_mst_topology_mgr *mg if (port->input || !port->ddps) continue; - if (!port->available_pbn) { - drm_modeset_lock(&mgr->base.lock, NULL); - drm_dp_send_enum_path_resources(mgr, mstb, port); - drm_modeset_unlock(&mgr->base.lock); - changed = true; - } - if (port->mstb) mstb_child = drm_dp_mst_topology_get_mstb_validated( mgr, port->mstb); @@ -2990,6 +2986,7 @@ drm_dp_send_enum_path_resources(struct drm_dp_mst_topology_mgr *mgr, ret = drm_dp_mst_wait_tx_reply(mstb, txmsg); if (ret > 0) { + ret = 0; path_res = &txmsg->reply.u.path_resources; if (txmsg->reply.reply_type == DP_SIDEBAND_REPLY_NAK) { @@ -3002,14 +2999,22 @@ drm_dp_send_enum_path_resources(struct drm_dp_mst_topology_mgr *mgr, path_res->port_number, path_res->full_payload_bw_number, path_res->avail_payload_bw_number); - port->available_pbn = - path_res->avail_payload_bw_number; + + /* + * If something changed, make sure we send a + * hotplug + */ + if (port->full_pbn != path_res->full_payload_bw_number || + port->fec_capable != path_res->fec_capable) + ret = 1; + + port->full_pbn = path_res->full_payload_bw_number; port->fec_capable = path_res->fec_capable; } } kfree(txmsg); - return 0; + return ret; } static struct drm_dp_mst_port *drm_dp_get_last_connected_port_to_mstb(struct drm_dp_mst_branch *mstb) @@ -3596,13 +3601,9 @@ drm_dp_mst_topology_mgr_invalidate_mstb(struct drm_dp_mst_branch *mstb) /* The link address will need to be re-sent on resume */ mstb->link_address_sent = false; - list_for_each_entry(port, &mstb->ports, next) { - /* The PBN for each port will also need to be re-probed */ - port->available_pbn = 0; - + list_for_each_entry(port, &mstb->ports, next) if (port->mstb) drm_dp_mst_topology_mgr_invalidate_mstb(port->mstb); - } } /** @@ -4829,41 +4830,102 @@ static bool drm_dp_mst_port_downstream_of_branch(struct drm_dp_mst_port *port, return false; } -static inline -int drm_dp_mst_atomic_check_bw_limit(struct drm_dp_mst_branch *branch, - struct drm_dp_mst_topology_state *mst_state) +static int +drm_dp_mst_atomic_check_port_bw_limit(struct drm_dp_mst_port *port, + struct drm_dp_mst_topology_state *state); + +static int +drm_dp_mst_atomic_check_mstb_bw_limit(struct drm_dp_mst_branch *mstb, + struct drm_dp_mst_topology_state *state) { - struct drm_dp_mst_port *port; struct drm_dp_vcpi_allocation *vcpi; - int pbn_limit = 0, pbn_used = 0; + struct drm_dp_mst_port *port; + int pbn_used = 0, ret; + bool found = false; - list_for_each_entry(port, &branch->ports, next) { - if (port->mstb) - if (drm_dp_mst_atomic_check_bw_limit(port->mstb, mst_state)) - return -ENOSPC; + /* Check that we have at least one port in our state that's downstream + * of this branch, otherwise we can skip this branch + */ + list_for_each_entry(vcpi, &state->vcpis, next) { + if (!vcpi->pbn || + !drm_dp_mst_port_downstream_of_branch(vcpi->port, mstb)) + continue; - if (port->available_pbn > 0) - pbn_limit = port->available_pbn; + found = true; + break; } - DRM_DEBUG_ATOMIC("[MST BRANCH:%p] branch has %d PBN available\n", - branch, pbn_limit); + if (!found) + return 0; - list_for_each_entry(vcpi, &mst_state->vcpis, next) { - if (!vcpi->pbn) - continue; + if (mstb->port_parent) + DRM_DEBUG_ATOMIC("[MSTB:%p] [MST PORT:%p] Checking bandwidth limits on [MSTB:%p]\n", + mstb->port_parent->parent, mstb->port_parent, + mstb); + else + DRM_DEBUG_ATOMIC("[MSTB:%p] Checking bandwidth limits\n", + mstb); + + list_for_each_entry(port, &mstb->ports, next) { + ret = drm_dp_mst_atomic_check_port_bw_limit(port, state); + if (ret < 0) + return ret; - if (drm_dp_mst_port_downstream_of_branch(vcpi->port, branch)) - pbn_used += vcpi->pbn; + pbn_used += ret; } - DRM_DEBUG_ATOMIC("[MST BRANCH:%p] branch used %d PBN\n", - branch, pbn_used); - if (pbn_used > pbn_limit) { - DRM_DEBUG_ATOMIC("[MST BRANCH:%p] No available bandwidth\n", - branch); + return pbn_used; +} + +static int +drm_dp_mst_atomic_check_port_bw_limit(struct drm_dp_mst_port *port, + struct drm_dp_mst_topology_state *state) +{ + struct drm_dp_vcpi_allocation *vcpi; + int pbn_used = 0; + + if (port->pdt == DP_PEER_DEVICE_NONE) + return 0; + + if (drm_dp_mst_is_end_device(port->pdt, port->mcs)) { + bool found = false; + + list_for_each_entry(vcpi, &state->vcpis, next) { + if (vcpi->port != port) + continue; + if (!vcpi->pbn) + return 0; + + found = true; + break; + } + if (!found) + return 0; + + /* This should never happen, as it means we tried to + * set a mode before querying the full_pbn + */ + if (WARN_ON(!port->full_pbn)) + return -EINVAL; + + pbn_used = vcpi->pbn; + } else { + pbn_used = drm_dp_mst_atomic_check_mstb_bw_limit(port->mstb, + state); + if (pbn_used <= 0) + return pbn_used; + } + + if (pbn_used > port->full_pbn) { + DRM_DEBUG_ATOMIC("[MSTB:%p] [MST PORT:%p] required PBN of %d exceeds port limit of %d\n", + port->parent, port, pbn_used, + port->full_pbn); return -ENOSPC; } - return 0; + + DRM_DEBUG_ATOMIC("[MSTB:%p] [MST PORT:%p] uses %d out of %d PBN\n", + port->parent, port, pbn_used, port->full_pbn); + + return pbn_used; } static inline int @@ -5061,9 +5123,15 @@ int drm_dp_mst_atomic_check(struct drm_atomic_state *state) ret = drm_dp_mst_atomic_check_vcpi_alloc_limit(mgr, mst_state); if (ret) break; - ret = drm_dp_mst_atomic_check_bw_limit(mgr->mst_primary, mst_state); - if (ret) + + mutex_lock(&mgr->lock); + ret = drm_dp_mst_atomic_check_mstb_bw_limit(mgr->mst_primary, + mst_state); + mutex_unlock(&mgr->lock); + if (ret < 0) break; + else + ret = 0; } return ret; diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index a421a2eed48a..df31e5782eed 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -254,11 +254,16 @@ static void *drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem) if (ret) goto err_zero_use; - if (obj->import_attach) + if (obj->import_attach) { shmem->vaddr = dma_buf_vmap(obj->import_attach->dmabuf); - else + } else { + pgprot_t prot = PAGE_KERNEL; + + if (!shmem->map_cached) + prot = pgprot_writecombine(prot); shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT, - VM_MAP, pgprot_writecombine(PAGE_KERNEL)); + VM_MAP, prot); + } if (!shmem->vaddr) { DRM_DEBUG_KMS("Failed to vmap pages\n"); @@ -540,8 +545,9 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) } vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND; - vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); - vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + if (!shmem->map_cached) + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); vma->vm_ops = &drm_gem_shmem_vm_ops; return 0; diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 8428ae12dfa5..1f79bc2a881e 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -55,6 +55,7 @@ static const char * const decon_clks_name[] = { struct decon_context { struct device *dev; struct drm_device *drm_dev; + void *dma_priv; struct exynos_drm_crtc *crtc; struct exynos_drm_plane planes[WINDOWS_NR]; struct exynos_drm_plane_config configs[WINDOWS_NR]; @@ -644,7 +645,7 @@ static int decon_bind(struct device *dev, struct device *master, void *data) decon_clear_channels(ctx->crtc); - return exynos_drm_register_dma(drm_dev, dev); + return exynos_drm_register_dma(drm_dev, dev, &ctx->dma_priv); } static void decon_unbind(struct device *dev, struct device *master, void *data) @@ -654,7 +655,7 @@ static void decon_unbind(struct device *dev, struct device *master, void *data) decon_atomic_disable(ctx->crtc); /* detach this sub driver from iommu mapping if supported. */ - exynos_drm_unregister_dma(ctx->drm_dev, ctx->dev); + exynos_drm_unregister_dma(ctx->drm_dev, ctx->dev, &ctx->dma_priv); } static const struct component_ops decon_component_ops = { diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index ff59c641fa80..1eed3327999f 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -40,6 +40,7 @@ struct decon_context { struct device *dev; struct drm_device *drm_dev; + void *dma_priv; struct exynos_drm_crtc *crtc; struct exynos_drm_plane planes[WINDOWS_NR]; struct exynos_drm_plane_config configs[WINDOWS_NR]; @@ -127,13 +128,13 @@ static int decon_ctx_initialize(struct decon_context *ctx, decon_clear_channels(ctx->crtc); - return exynos_drm_register_dma(drm_dev, ctx->dev); + return exynos_drm_register_dma(drm_dev, ctx->dev, &ctx->dma_priv); } static void decon_ctx_remove(struct decon_context *ctx) { /* detach this sub driver from iommu mapping if supported. */ - exynos_drm_unregister_dma(ctx->drm_dev, ctx->dev); + exynos_drm_unregister_dma(ctx->drm_dev, ctx->dev, &ctx->dma_priv); } static u32 decon_calc_clkdiv(struct decon_context *ctx, diff --git a/drivers/gpu/drm/exynos/exynos_drm_dma.c b/drivers/gpu/drm/exynos/exynos_drm_dma.c index 9ebc02768847..619f81435c1b 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dma.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dma.c @@ -58,7 +58,7 @@ static inline void clear_dma_max_seg_size(struct device *dev) * mapping. */ static int drm_iommu_attach_device(struct drm_device *drm_dev, - struct device *subdrv_dev) + struct device *subdrv_dev, void **dma_priv) { struct exynos_drm_private *priv = drm_dev->dev_private; int ret; @@ -74,7 +74,14 @@ static int drm_iommu_attach_device(struct drm_device *drm_dev, return ret; if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) { - if (to_dma_iommu_mapping(subdrv_dev)) + /* + * Keep the original DMA mapping of the sub-device and + * restore it on Exynos DRM detach, otherwise the DMA + * framework considers it as IOMMU-less during the next + * probe (in case of deferred probe or modular build) + */ + *dma_priv = to_dma_iommu_mapping(subdrv_dev); + if (*dma_priv) arm_iommu_detach_device(subdrv_dev); ret = arm_iommu_attach_device(subdrv_dev, priv->mapping); @@ -98,19 +105,21 @@ static int drm_iommu_attach_device(struct drm_device *drm_dev, * mapping */ static void drm_iommu_detach_device(struct drm_device *drm_dev, - struct device *subdrv_dev) + struct device *subdrv_dev, void **dma_priv) { struct exynos_drm_private *priv = drm_dev->dev_private; - if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) + if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) { arm_iommu_detach_device(subdrv_dev); - else if (IS_ENABLED(CONFIG_IOMMU_DMA)) + arm_iommu_attach_device(subdrv_dev, *dma_priv); + } else if (IS_ENABLED(CONFIG_IOMMU_DMA)) iommu_detach_device(priv->mapping, subdrv_dev); clear_dma_max_seg_size(subdrv_dev); } -int exynos_drm_register_dma(struct drm_device *drm, struct device *dev) +int exynos_drm_register_dma(struct drm_device *drm, struct device *dev, + void **dma_priv) { struct exynos_drm_private *priv = drm->dev_private; @@ -137,13 +146,14 @@ int exynos_drm_register_dma(struct drm_device *drm, struct device *dev) priv->mapping = mapping; } - return drm_iommu_attach_device(drm, dev); + return drm_iommu_attach_device(drm, dev, dma_priv); } -void exynos_drm_unregister_dma(struct drm_device *drm, struct device *dev) +void exynos_drm_unregister_dma(struct drm_device *drm, struct device *dev, + void **dma_priv) { if (IS_ENABLED(CONFIG_EXYNOS_IOMMU)) - drm_iommu_detach_device(drm, dev); + drm_iommu_detach_device(drm, dev, dma_priv); } void exynos_drm_cleanup_dma(struct drm_device *drm) diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h index d4d21d8cfb90..6ae9056e7a18 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.h +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h @@ -223,8 +223,10 @@ static inline bool is_drm_iommu_supported(struct drm_device *drm_dev) return priv->mapping ? true : false; } -int exynos_drm_register_dma(struct drm_device *drm, struct device *dev); -void exynos_drm_unregister_dma(struct drm_device *drm, struct device *dev); +int exynos_drm_register_dma(struct drm_device *drm, struct device *dev, + void **dma_priv); +void exynos_drm_unregister_dma(struct drm_device *drm, struct device *dev, + void **dma_priv); void exynos_drm_cleanup_dma(struct drm_device *drm); #ifdef CONFIG_DRM_EXYNOS_DPI diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index 33628d85edad..a85365c56d4d 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -1773,8 +1773,9 @@ static int exynos_dsi_probe(struct platform_device *pdev) ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(dsi->supplies), dsi->supplies); if (ret) { - dev_info(dev, "failed to get regulators: %d\n", ret); - return -EPROBE_DEFER; + if (ret != -EPROBE_DEFER) + dev_info(dev, "failed to get regulators: %d\n", ret); + return ret; } dsi->clks = devm_kcalloc(dev, @@ -1787,9 +1788,10 @@ static int exynos_dsi_probe(struct platform_device *pdev) dsi->clks[i] = devm_clk_get(dev, clk_names[i]); if (IS_ERR(dsi->clks[i])) { if (strcmp(clk_names[i], "sclk_mipi") == 0) { - strcpy(clk_names[i], OLD_SCLK_MIPI_CLK_NAME); - i--; - continue; + dsi->clks[i] = devm_clk_get(dev, + OLD_SCLK_MIPI_CLK_NAME); + if (!IS_ERR(dsi->clks[i])) + continue; } dev_info(dev, "failed to get the clock: %s\n", diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c index 8ea2e1d77802..29ab8be8604c 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c @@ -97,6 +97,7 @@ struct fimc_scaler { struct fimc_context { struct exynos_drm_ipp ipp; struct drm_device *drm_dev; + void *dma_priv; struct device *dev; struct exynos_drm_ipp_task *task; struct exynos_drm_ipp_formats *formats; @@ -1133,7 +1134,7 @@ static int fimc_bind(struct device *dev, struct device *master, void *data) ctx->drm_dev = drm_dev; ipp->drm_dev = drm_dev; - exynos_drm_register_dma(drm_dev, dev); + exynos_drm_register_dma(drm_dev, dev, &ctx->dma_priv); exynos_drm_ipp_register(dev, ipp, &ipp_funcs, DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE | @@ -1153,7 +1154,7 @@ static void fimc_unbind(struct device *dev, struct device *master, struct exynos_drm_ipp *ipp = &ctx->ipp; exynos_drm_ipp_unregister(dev, ipp); - exynos_drm_unregister_dma(drm_dev, dev); + exynos_drm_unregister_dma(drm_dev, dev, &ctx->dma_priv); } static const struct component_ops fimc_component_ops = { diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 21aec38702fc..bb67cad8371f 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -167,6 +167,7 @@ static struct fimd_driver_data exynos5420_fimd_driver_data = { struct fimd_context { struct device *dev; struct drm_device *drm_dev; + void *dma_priv; struct exynos_drm_crtc *crtc; struct exynos_drm_plane planes[WINDOWS_NR]; struct exynos_drm_plane_config configs[WINDOWS_NR]; @@ -1090,7 +1091,7 @@ static int fimd_bind(struct device *dev, struct device *master, void *data) if (is_drm_iommu_supported(drm_dev)) fimd_clear_channels(ctx->crtc); - return exynos_drm_register_dma(drm_dev, dev); + return exynos_drm_register_dma(drm_dev, dev, &ctx->dma_priv); } static void fimd_unbind(struct device *dev, struct device *master, @@ -1100,7 +1101,7 @@ static void fimd_unbind(struct device *dev, struct device *master, fimd_atomic_disable(ctx->crtc); - exynos_drm_unregister_dma(ctx->drm_dev, ctx->dev); + exynos_drm_unregister_dma(ctx->drm_dev, ctx->dev, &ctx->dma_priv); if (ctx->encoder) exynos_dpi_remove(ctx->encoder); diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index 2a3382d43bc9..fcee33a43aca 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -232,6 +232,7 @@ struct g2d_runqueue_node { struct g2d_data { struct device *dev; + void *dma_priv; struct clk *gate_clk; void __iomem *regs; int irq; @@ -1409,7 +1410,7 @@ static int g2d_bind(struct device *dev, struct device *master, void *data) return ret; } - ret = exynos_drm_register_dma(drm_dev, dev); + ret = exynos_drm_register_dma(drm_dev, dev, &g2d->dma_priv); if (ret < 0) { dev_err(dev, "failed to enable iommu.\n"); g2d_fini_cmdlist(g2d); @@ -1434,7 +1435,7 @@ static void g2d_unbind(struct device *dev, struct device *master, void *data) priv->g2d_dev = NULL; cancel_work_sync(&g2d->runqueue_work); - exynos_drm_unregister_dma(g2d->drm_dev, dev); + exynos_drm_unregister_dma(g2d->drm_dev, dev, &g2d->dma_priv); } static const struct component_ops g2d_component_ops = { diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 88b6fcaa20be..45e9aee8366a 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -97,6 +97,7 @@ struct gsc_scaler { struct gsc_context { struct exynos_drm_ipp ipp; struct drm_device *drm_dev; + void *dma_priv; struct device *dev; struct exynos_drm_ipp_task *task; struct exynos_drm_ipp_formats *formats; @@ -1169,7 +1170,7 @@ static int gsc_bind(struct device *dev, struct device *master, void *data) ctx->drm_dev = drm_dev; ctx->drm_dev = drm_dev; - exynos_drm_register_dma(drm_dev, dev); + exynos_drm_register_dma(drm_dev, dev, &ctx->dma_priv); exynos_drm_ipp_register(dev, ipp, &ipp_funcs, DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE | @@ -1189,7 +1190,7 @@ static void gsc_unbind(struct device *dev, struct device *master, struct exynos_drm_ipp *ipp = &ctx->ipp; exynos_drm_ipp_unregister(dev, ipp); - exynos_drm_unregister_dma(drm_dev, dev); + exynos_drm_unregister_dma(drm_dev, dev, &ctx->dma_priv); } static const struct component_ops gsc_component_ops = { diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c index b98482990d1a..dafa87b82052 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c +++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c @@ -56,6 +56,7 @@ struct rot_variant { struct rot_context { struct exynos_drm_ipp ipp; struct drm_device *drm_dev; + void *dma_priv; struct device *dev; void __iomem *regs; struct clk *clock; @@ -243,7 +244,7 @@ static int rotator_bind(struct device *dev, struct device *master, void *data) rot->drm_dev = drm_dev; ipp->drm_dev = drm_dev; - exynos_drm_register_dma(drm_dev, dev); + exynos_drm_register_dma(drm_dev, dev, &rot->dma_priv); exynos_drm_ipp_register(dev, ipp, &ipp_funcs, DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE, @@ -261,7 +262,7 @@ static void rotator_unbind(struct device *dev, struct device *master, struct exynos_drm_ipp *ipp = &rot->ipp; exynos_drm_ipp_unregister(dev, ipp); - exynos_drm_unregister_dma(rot->drm_dev, rot->dev); + exynos_drm_unregister_dma(rot->drm_dev, rot->dev, &rot->dma_priv); } static const struct component_ops rotator_component_ops = { diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c index 497973e9b2c5..93c43c8d914e 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_scaler.c +++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c @@ -39,6 +39,7 @@ struct scaler_data { struct scaler_context { struct exynos_drm_ipp ipp; struct drm_device *drm_dev; + void *dma_priv; struct device *dev; void __iomem *regs; struct clk *clock[SCALER_MAX_CLK]; @@ -450,7 +451,7 @@ static int scaler_bind(struct device *dev, struct device *master, void *data) scaler->drm_dev = drm_dev; ipp->drm_dev = drm_dev; - exynos_drm_register_dma(drm_dev, dev); + exynos_drm_register_dma(drm_dev, dev, &scaler->dma_priv); exynos_drm_ipp_register(dev, ipp, &ipp_funcs, DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE | @@ -470,7 +471,8 @@ static void scaler_unbind(struct device *dev, struct device *master, struct exynos_drm_ipp *ipp = &scaler->ipp; exynos_drm_ipp_unregister(dev, ipp); - exynos_drm_unregister_dma(scaler->drm_dev, scaler->dev); + exynos_drm_unregister_dma(scaler->drm_dev, scaler->dev, + &scaler->dma_priv); } static const struct component_ops scaler_component_ops = { diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index 9ff921f43a93..f141916eade6 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -1805,18 +1805,10 @@ static int hdmi_resources_init(struct hdmi_context *hdata) hdata->reg_hdmi_en = devm_regulator_get_optional(dev, "hdmi-en"); - if (PTR_ERR(hdata->reg_hdmi_en) != -ENODEV) { + if (PTR_ERR(hdata->reg_hdmi_en) != -ENODEV) if (IS_ERR(hdata->reg_hdmi_en)) return PTR_ERR(hdata->reg_hdmi_en); - ret = regulator_enable(hdata->reg_hdmi_en); - if (ret) { - DRM_DEV_ERROR(dev, - "failed to enable hdmi-en regulator\n"); - return ret; - } - } - return hdmi_bridge_init(hdata); } @@ -2023,6 +2015,15 @@ static int hdmi_probe(struct platform_device *pdev) } } + if (!IS_ERR(hdata->reg_hdmi_en)) { + ret = regulator_enable(hdata->reg_hdmi_en); + if (ret) { + DRM_DEV_ERROR(dev, + "failed to enable hdmi-en regulator\n"); + goto err_hdmiphy; + } + } + pm_runtime_enable(dev); audio_infoframe = &hdata->audio.infoframe; @@ -2047,7 +2048,8 @@ err_unregister_audio: err_rpm_disable: pm_runtime_disable(dev); - + if (!IS_ERR(hdata->reg_hdmi_en)) + regulator_disable(hdata->reg_hdmi_en); err_hdmiphy: if (hdata->hdmiphy_port) put_device(&hdata->hdmiphy_port->dev); diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index 38ae9c32feef..21b726baedea 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -94,6 +94,7 @@ struct mixer_context { struct platform_device *pdev; struct device *dev; struct drm_device *drm_dev; + void *dma_priv; struct exynos_drm_crtc *crtc; struct exynos_drm_plane planes[MIXER_WIN_NR]; unsigned long flags; @@ -894,12 +895,14 @@ static int mixer_initialize(struct mixer_context *mixer_ctx, } } - return exynos_drm_register_dma(drm_dev, mixer_ctx->dev); + return exynos_drm_register_dma(drm_dev, mixer_ctx->dev, + &mixer_ctx->dma_priv); } static void mixer_ctx_remove(struct mixer_context *mixer_ctx) { - exynos_drm_unregister_dma(mixer_ctx->drm_dev, mixer_ctx->dev); + exynos_drm_unregister_dma(mixer_ctx->drm_dev, mixer_ctx->dev, + &mixer_ctx->dma_priv); } static int mixer_enable_vblank(struct exynos_drm_crtc *crtc) diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_ade_reg.h b/drivers/gpu/drm/hisilicon/kirin/kirin_ade_reg.h index 0da860200410..e2ac09894a6d 100644 --- a/drivers/gpu/drm/hisilicon/kirin/kirin_ade_reg.h +++ b/drivers/gpu/drm/hisilicon/kirin/kirin_ade_reg.h @@ -83,7 +83,6 @@ #define VSIZE_OFST 20 #define LDI_INT_EN 0x741C #define FRAME_END_INT_EN_OFST 1 -#define UNDERFLOW_INT_EN_OFST 2 #define LDI_CTRL 0x7420 #define BPP_OFST 3 #define DATA_GATE_EN BIT(2) diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c index 73cd28a6ea07..86000127d4ee 100644 --- a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c +++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c @@ -46,7 +46,6 @@ struct ade_hw_ctx { struct clk *media_noc_clk; struct clk *ade_pix_clk; struct reset_control *reset; - struct work_struct display_reset_wq; bool power_on; int irq; @@ -136,7 +135,6 @@ static void ade_init(struct ade_hw_ctx *ctx) */ ade_update_bits(base + ADE_CTRL, FRM_END_START_OFST, FRM_END_START_MASK, REG_EFFECTIVE_IN_ADEEN_FRMEND); - ade_update_bits(base + LDI_INT_EN, UNDERFLOW_INT_EN_OFST, MASK(1), 1); } static bool ade_crtc_mode_fixup(struct drm_crtc *crtc, @@ -304,17 +302,6 @@ static void ade_crtc_disable_vblank(struct drm_crtc *crtc) MASK(1), 0); } -static void drm_underflow_wq(struct work_struct *work) -{ - struct ade_hw_ctx *ctx = container_of(work, struct ade_hw_ctx, - display_reset_wq); - struct drm_device *drm_dev = ctx->crtc->dev; - struct drm_atomic_state *state; - - state = drm_atomic_helper_suspend(drm_dev); - drm_atomic_helper_resume(drm_dev, state); -} - static irqreturn_t ade_irq_handler(int irq, void *data) { struct ade_hw_ctx *ctx = data; @@ -331,12 +318,6 @@ static irqreturn_t ade_irq_handler(int irq, void *data) MASK(1), 1); drm_crtc_handle_vblank(crtc); } - if (status & BIT(UNDERFLOW_INT_EN_OFST)) { - ade_update_bits(base + LDI_INT_CLR, UNDERFLOW_INT_EN_OFST, - MASK(1), 1); - DRM_ERROR("LDI underflow!"); - schedule_work(&ctx->display_reset_wq); - } return IRQ_HANDLED; } @@ -919,7 +900,6 @@ static void *ade_hw_ctx_alloc(struct platform_device *pdev, if (ret) return ERR_PTR(-EIO); - INIT_WORK(&ctx->display_reset_wq, drm_underflow_wq); ctx->crtc = crtc; return ctx; diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index b8c5f8934dbd..a1f2411aa21b 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -294,7 +294,7 @@ extra-$(CONFIG_DRM_I915_WERROR) += \ $(shell cd $(srctree)/$(src) && find * -name '*.h'))) quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@) - cmd_hdrtest = $(CC) $(c_flags) -S -o /dev/null -x c /dev/null -include $<; touch $@ + cmd_hdrtest = $(CC) $(filter-out $(CFLAGS_GCOV), $(c_flags)) -S -o /dev/null -x c /dev/null -include $<; touch $@ $(obj)/%.hdrtest: $(src)/%.h FORCE $(call if_changed_dep,hdrtest) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 21561acfa3ac..46c40db992dd 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -4466,13 +4466,19 @@ static void icl_dbuf_disable(struct drm_i915_private *dev_priv) static void icl_mbus_init(struct drm_i915_private *dev_priv) { - u32 val; + u32 mask, val; - val = MBUS_ABOX_BT_CREDIT_POOL1(16) | - MBUS_ABOX_BT_CREDIT_POOL2(16) | - MBUS_ABOX_B_CREDIT(1) | - MBUS_ABOX_BW_CREDIT(1); + mask = MBUS_ABOX_BT_CREDIT_POOL1_MASK | + MBUS_ABOX_BT_CREDIT_POOL2_MASK | + MBUS_ABOX_B_CREDIT_MASK | + MBUS_ABOX_BW_CREDIT_MASK; + val = I915_READ(MBUS_ABOX_CTL); + val &= ~mask; + val |= MBUS_ABOX_BT_CREDIT_POOL1(16) | + MBUS_ABOX_BT_CREDIT_POOL2(16) | + MBUS_ABOX_B_CREDIT(1) | + MBUS_ABOX_BW_CREDIT(1); I915_WRITE(MBUS_ABOX_CTL, val); } @@ -4968,8 +4974,21 @@ static void tgl_bw_buddy_init(struct drm_i915_private *dev_priv) I915_WRITE(BW_BUDDY1_CTL, BW_BUDDY_DISABLE); I915_WRITE(BW_BUDDY2_CTL, BW_BUDDY_DISABLE); } else { + u32 val; + I915_WRITE(BW_BUDDY1_PAGE_MASK, table[i].page_mask); I915_WRITE(BW_BUDDY2_PAGE_MASK, table[i].page_mask); + + /* Wa_22010178259:tgl */ + val = I915_READ(BW_BUDDY1_CTL); + val &= ~BW_BUDDY_TLB_REQ_TIMER_MASK; + val |= REG_FIELD_PREP(BW_BUDDY_TLB_REQ_TIMER_MASK, 0x8); + I915_WRITE(BW_BUDDY1_CTL, val); + + val = I915_READ(BW_BUDDY2_CTL); + val &= ~BW_BUDDY_TLB_REQ_TIMER_MASK; + val |= REG_FIELD_PREP(BW_BUDDY_TLB_REQ_TIMER_MASK, 0x8); + I915_WRITE(BW_BUDDY2_CTL, val); } } diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 89c9cf5f38d2..83025052c965 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -852,10 +852,12 @@ void intel_psr_enable(struct intel_dp *intel_dp, { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - if (!crtc_state->has_psr) + if (!CAN_PSR(dev_priv) || dev_priv->psr.dp != intel_dp) return; - if (WARN_ON(!CAN_PSR(dev_priv))) + dev_priv->psr.force_mode_changed = false; + + if (!crtc_state->has_psr) return; WARN_ON(dev_priv->drrs.dp); @@ -1009,6 +1011,8 @@ void intel_psr_update(struct intel_dp *intel_dp, if (!CAN_PSR(dev_priv) || READ_ONCE(psr->dp) != intel_dp) return; + dev_priv->psr.force_mode_changed = false; + mutex_lock(&dev_priv->psr.lock); enable = crtc_state->has_psr && psr_global_enabled(psr->debug); @@ -1534,7 +1538,7 @@ void intel_psr_atomic_check(struct drm_connector *connector, struct drm_crtc_state *crtc_state; if (!CAN_PSR(dev_priv) || !new_state->crtc || - dev_priv->psr.initially_probed) + !dev_priv->psr.force_mode_changed) return; intel_connector = to_intel_connector(connector); @@ -1545,5 +1549,18 @@ void intel_psr_atomic_check(struct drm_connector *connector, crtc_state = drm_atomic_get_new_crtc_state(new_state->state, new_state->crtc); crtc_state->mode_changed = true; - dev_priv->psr.initially_probed = true; +} + +void intel_psr_set_force_mode_changed(struct intel_dp *intel_dp) +{ + struct drm_i915_private *dev_priv; + + if (!intel_dp) + return; + + dev_priv = dp_to_i915(intel_dp); + if (!CAN_PSR(dev_priv) || intel_dp != dev_priv->psr.dp) + return; + + dev_priv->psr.force_mode_changed = true; } diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h index c58a1d438808..274fc6bb6221 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.h +++ b/drivers/gpu/drm/i915/display/intel_psr.h @@ -40,5 +40,6 @@ bool intel_psr_enabled(struct intel_dp *intel_dp); void intel_psr_atomic_check(struct drm_connector *connector, struct drm_connector_state *old_state, struct drm_connector_state *new_state); +void intel_psr_set_force_mode_changed(struct intel_dp *intel_dp); #endif /* __INTEL_PSR_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 60c984e10c4a..7643a30ba4cd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -423,7 +423,8 @@ eb_validate_vma(struct i915_execbuffer *eb, if (unlikely(entry->flags & eb->invalid_flags)) return -EINVAL; - if (unlikely(entry->alignment && !is_power_of_2(entry->alignment))) + if (unlikely(entry->alignment && + !is_power_of_2_u64(entry->alignment))) return -EINVAL; /* diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 35985218bd85..5da9f9e534b9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -225,6 +225,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, /* But keep the pointer alive for RCU-protected lookups */ call_rcu(&obj->rcu, __i915_gem_free_object_rcu); + cond_resched(); } intel_runtime_pm_put(&i915->runtime_pm, wakeref); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index f7e4b39c734f..59b387ade49c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -256,8 +256,7 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *i915) with_intel_runtime_pm(&i915->runtime_pm, wakeref) { freed = i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_ACTIVE); + I915_SHRINK_UNBOUND); } return freed; @@ -336,7 +335,6 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) freed_pages = 0; with_intel_runtime_pm(&i915->runtime_pm, wakeref) freed_pages += i915_gem_shrink(i915, -1UL, NULL, - I915_SHRINK_ACTIVE | I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_WRITEBACK); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index ef7c74cff28a..43912e9b683d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -570,7 +570,7 @@ static bool assert_mmap_offset(struct drm_i915_private *i915, obj = i915_gem_object_create_internal(i915, size); if (IS_ERR(obj)) - return PTR_ERR(obj); + return false; mmo = mmap_offset_attach(obj, I915_MMAP_OFFSET_GTT, NULL); i915_gem_object_put(obj); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index 8a5054f21bf8..24c99d0838af 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -147,24 +147,32 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) fence = i915_active_fence_get(&tl->last_request); if (fence) { + mutex_unlock(&tl->mutex); + timeout = dma_fence_wait_timeout(fence, interruptible, timeout); dma_fence_put(fence); + + /* Retirement is best effort */ + if (!mutex_trylock(&tl->mutex)) { + active_count++; + goto out_active; + } } } if (!retire_requests(tl) || flush_submission(gt)) active_count++; + mutex_unlock(&tl->mutex); - spin_lock(&timelines->lock); +out_active: spin_lock(&timelines->lock); - /* Resume iteration after dropping lock */ + /* Resume list iteration after reacquiring spinlock */ list_safe_reset_next(tl, tn, link); if (atomic_dec_and_test(&tl->active_count)) list_del(&tl->link); - mutex_unlock(&tl->mutex); /* Defer the final release to after the spinlock */ if (refcount_dec_and_test(&tl->kref.refcount)) { diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index fe8a59aaa629..940e7f7df69a 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1679,11 +1679,9 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq) if (!intel_engine_has_timeslices(engine)) return false; - if (list_is_last(&rq->sched.link, &engine->active.requests)) - return false; - - hint = max(rq_prio(list_next_entry(rq, sched.link)), - engine->execlists.queue_priority_hint); + hint = engine->execlists.queue_priority_hint; + if (!list_is_last(&rq->sched.link, &engine->active.requests)) + hint = max(hint, rq_prio(list_next_entry(rq, sched.link))); return hint >= effective_prio(rq); } @@ -1725,6 +1723,18 @@ static void set_timeslice(struct intel_engine_cs *engine) set_timer_ms(&engine->execlists.timer, active_timeslice(engine)); } +static void start_timeslice(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists *execlists = &engine->execlists; + + execlists->switch_priority_hint = execlists->queue_priority_hint; + + if (timer_pending(&execlists->timer)) + return; + + set_timer_ms(&execlists->timer, timeslice(engine)); +} + static void record_preemption(struct intel_engine_execlists *execlists) { (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); @@ -1888,11 +1898,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * Even if ELSP[1] is occupied and not worthy * of timeslices, our queue might be. */ - if (!execlists->timer.expires && - need_timeslice(engine, last)) - set_timer_ms(&execlists->timer, - timeslice(engine)); - + start_timeslice(engine); return; } } @@ -1927,7 +1933,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (last && !can_merge_rq(last, rq)) { spin_unlock(&ve->base.active.lock); - return; /* leave this for another */ + start_timeslice(engine); + return; /* leave this for another sibling */ } ENGINE_TRACE(engine, diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 87716529cd2f..d8d9f1179c2b 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -192,11 +192,15 @@ static void cacheline_release(struct intel_timeline_cacheline *cl) static void cacheline_free(struct intel_timeline_cacheline *cl) { + if (!i915_active_acquire_if_busy(&cl->active)) { + __idle_cacheline_free(cl); + return; + } + GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE)); cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE); - if (i915_active_is_idle(&cl->active)) - __idle_cacheline_free(cl); + i915_active_release(&cl->active); } int intel_timeline_init(struct intel_timeline *timeline, diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 4e292d4bf7b9..173a7f2d109f 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -575,24 +575,19 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - u32 val; - /* Wa_1409142259:tgl */ WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); - /* Wa_1604555607:tgl */ - val = intel_uncore_read(engine->uncore, FF_MODE2); - val &= ~FF_MODE2_TDS_TIMER_MASK; - val |= FF_MODE2_TDS_TIMER_128; /* - * FIXME: FF_MODE2 register is not readable till TGL B0. We can - * enable verification of WA from the later steppings, which enables - * the read of FF_MODE2. + * Wa_1604555607:gen12 and Wa_1608008084:gen12 + * FF_MODE2 register will return the wrong value when read. The default + * value for this register is zero for all fields and there are no bit + * masks. So instead of doing a RMW we should just write the TDS timer + * value for Wa_1604555607. */ - wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK, val, - IS_TGL_REVID(engine->i915, TGL_REVID_A0, TGL_REVID_A0) ? 0 : - FF_MODE2_TDS_TIMER_MASK); + wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK, + FF_MODE2_TDS_TIMER_128, 0); } static void diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index e1c313da6c00..a62bdf9be682 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -457,7 +457,8 @@ void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected) struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; /* TODO: add more platforms support */ - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) || + IS_COFFEELAKE(dev_priv)) { if (connected) { vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED; diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index 2477a1e5a166..ae139f0877ae 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -151,12 +151,12 @@ static void dmabuf_gem_object_free(struct kref *kref) dmabuf_obj = container_of(pos, struct intel_vgpu_dmabuf_obj, list); if (dmabuf_obj == obj) { + list_del(pos); intel_gvt_hypervisor_put_vfio_device(vgpu); idr_remove(&vgpu->object_idr, dmabuf_obj->dmabuf_id); kfree(dmabuf_obj->info); kfree(dmabuf_obj); - list_del(pos); break; } } diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c index 867e7629025b..33569b910ed5 100644 --- a/drivers/gpu/drm/i915/gvt/opregion.c +++ b/drivers/gpu/drm/i915/gvt/opregion.c @@ -147,15 +147,14 @@ static void virt_vbt_generation(struct vbt *v) /* there's features depending on version! */ v->header.version = 155; v->header.header_size = sizeof(v->header); - v->header.vbt_size = sizeof(struct vbt) - sizeof(v->header); + v->header.vbt_size = sizeof(struct vbt); v->header.bdb_offset = offsetof(struct vbt, bdb_header); strcpy(&v->bdb_header.signature[0], "BIOS_DATA_BLOCK"); v->bdb_header.version = 186; /* child_dev_size = 33 */ v->bdb_header.header_size = sizeof(v->bdb_header); - v->bdb_header.bdb_size = sizeof(struct vbt) - sizeof(struct vbt_header) - - sizeof(struct bdb_header); + v->bdb_header.bdb_size = sizeof(struct vbt) - sizeof(struct vbt_header); /* general features */ v->general_features_header.id = BDB_GENERAL_FEATURES; diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 85bd9bf4f6ee..345c2aa3b491 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -272,10 +272,17 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) { struct intel_gvt *gvt = vgpu->gvt; - mutex_lock(&vgpu->vgpu_lock); - WARN(vgpu->active, "vGPU is still active!\n"); + /* + * remove idr first so later clean can judge if need to stop + * service if no active vgpu. + */ + mutex_lock(&gvt->lock); + idr_remove(&gvt->vgpu_idr, vgpu->id); + mutex_unlock(&gvt->lock); + + mutex_lock(&vgpu->vgpu_lock); intel_gvt_debugfs_remove_vgpu(vgpu); intel_vgpu_clean_sched_policy(vgpu); intel_vgpu_clean_submission(vgpu); @@ -290,7 +297,6 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) mutex_unlock(&vgpu->vgpu_lock); mutex_lock(&gvt->lock); - idr_remove(&gvt->vgpu_idr, vgpu->id); if (idr_is_empty(&gvt->vgpu_idr)) intel_gvt_clean_irq(gvt); intel_gvt_update_vgpu_types(gvt); @@ -560,9 +566,9 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, intel_vgpu_reset_mmio(vgpu, dmlr); populate_pvinfo_page(vgpu); - intel_vgpu_reset_display(vgpu); if (dmlr) { + intel_vgpu_reset_display(vgpu); intel_vgpu_reset_cfg_space(vgpu); /* only reset the failsafe mode when dmlr reset */ vgpu->failsafe = false; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index f7385abdd74b..8410330ce4f0 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -56,6 +56,7 @@ #include "display/intel_hotplug.h" #include "display/intel_overlay.h" #include "display/intel_pipe_crc.h" +#include "display/intel_psr.h" #include "display/intel_sprite.h" #include "display/intel_vga.h" @@ -330,6 +331,8 @@ static int i915_driver_modeset_probe(struct drm_i915_private *i915) intel_init_ipc(i915); + intel_psr_set_force_mode_changed(i915->psr.dp); + return 0; cleanup_gem: diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 077af22b8340..810e3ccd56ec 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -505,7 +505,7 @@ struct i915_psr { bool dc3co_enabled; u32 dc3co_exit_delay; struct delayed_work idle_work; - bool initially_probed; + bool force_mode_changed; }; #define QUIRK_LVDS_SSC_DISABLE (1<<1) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 83f01401b8b5..f631f6d21127 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -437,7 +437,7 @@ static const struct intel_device_info snb_m_gt2_info = { .has_rc6 = 1, \ .has_rc6p = 1, \ .has_rps = true, \ - .ppgtt_type = INTEL_PPGTT_FULL, \ + .ppgtt_type = INTEL_PPGTT_ALIASING, \ .ppgtt_size = 31, \ IVB_PIPE_OFFSETS, \ IVB_CURSOR_OFFSETS, \ @@ -494,7 +494,7 @@ static const struct intel_device_info vlv_info = { .has_rps = true, .display.has_gmch = 1, .display.has_hotplug = 1, - .ppgtt_type = INTEL_PPGTT_FULL, + .ppgtt_type = INTEL_PPGTT_ALIASING, .ppgtt_size = 31, .has_snoop = true, .has_coherent_ggtt = false, diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 0f556d80ba36..3b6b913bd27a 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1954,9 +1954,10 @@ out: return i915_vma_get(oa_bo->vma); } -static int emit_oa_config(struct i915_perf_stream *stream, - struct i915_oa_config *oa_config, - struct intel_context *ce) +static struct i915_request * +emit_oa_config(struct i915_perf_stream *stream, + struct i915_oa_config *oa_config, + struct intel_context *ce) { struct i915_request *rq; struct i915_vma *vma; @@ -1964,7 +1965,7 @@ static int emit_oa_config(struct i915_perf_stream *stream, vma = get_oa_vma(stream, oa_config); if (IS_ERR(vma)) - return PTR_ERR(vma); + return ERR_CAST(vma); err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); if (err) @@ -1989,13 +1990,17 @@ static int emit_oa_config(struct i915_perf_stream *stream, err = rq->engine->emit_bb_start(rq, vma->node.start, 0, I915_DISPATCH_SECURE); + if (err) + goto err_add_request; + + i915_request_get(rq); err_add_request: i915_request_add(rq); err_vma_unpin: i915_vma_unpin(vma); err_vma_put: i915_vma_put(vma); - return err; + return err ? ERR_PTR(err) : rq; } static struct intel_context *oa_context(struct i915_perf_stream *stream) @@ -2003,7 +2008,8 @@ static struct intel_context *oa_context(struct i915_perf_stream *stream) return stream->pinned_ctx ?: stream->engine->kernel_context; } -static int hsw_enable_metric_set(struct i915_perf_stream *stream) +static struct i915_request * +hsw_enable_metric_set(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; @@ -2406,7 +2412,8 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs)); } -static int gen8_enable_metric_set(struct i915_perf_stream *stream) +static struct i915_request * +gen8_enable_metric_set(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; struct i915_oa_config *oa_config = stream->oa_config; @@ -2448,7 +2455,7 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) */ ret = lrc_configure_all_contexts(stream, oa_config); if (ret) - return ret; + return ERR_PTR(ret); return emit_oa_config(stream, oa_config, oa_context(stream)); } @@ -2460,7 +2467,8 @@ static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream) 0 : GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS); } -static int gen12_enable_metric_set(struct i915_perf_stream *stream) +static struct i915_request * +gen12_enable_metric_set(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; struct i915_oa_config *oa_config = stream->oa_config; @@ -2491,7 +2499,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream) */ ret = gen12_configure_all_contexts(stream, oa_config); if (ret) - return ret; + return ERR_PTR(ret); /* * For Gen12, performance counters are context @@ -2501,7 +2509,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream) if (stream->ctx) { ret = gen12_configure_oar_context(stream, true); if (ret) - return ret; + return ERR_PTR(ret); } return emit_oa_config(stream, oa_config, oa_context(stream)); @@ -2696,6 +2704,20 @@ static const struct i915_perf_stream_ops i915_oa_stream_ops = { .read = i915_oa_read, }; +static int i915_perf_stream_enable_sync(struct i915_perf_stream *stream) +{ + struct i915_request *rq; + + rq = stream->perf->ops.enable_metric_set(stream); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + + return 0; +} + /** * i915_oa_stream_init - validate combined props for OA stream and init * @stream: An i915 perf stream @@ -2829,7 +2851,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, stream->ops = &i915_oa_stream_ops; perf->exclusive_stream = stream; - ret = perf->ops.enable_metric_set(stream); + ret = i915_perf_stream_enable_sync(stream); if (ret) { DRM_DEBUG("Unable to enable metric set\n"); goto err_enable; @@ -3147,7 +3169,7 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream, return -EINVAL; if (config != stream->oa_config) { - int err; + struct i915_request *rq; /* * If OA is bound to a specific context, emit the @@ -3158,11 +3180,13 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream, * When set globally, we use a low priority kernel context, * so it will effectively take effect when idle. */ - err = emit_oa_config(stream, config, oa_context(stream)); - if (err == 0) + rq = emit_oa_config(stream, config, oa_context(stream)); + if (!IS_ERR(rq)) { config = xchg(&stream->oa_config, config); - else - ret = err; + i915_request_put(rq); + } else { + ret = PTR_ERR(rq); + } } i915_oa_config_put(config); diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index 45e581455f5d..a0e22f00f6cf 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -339,7 +339,8 @@ struct i915_oa_ops { * counter reports being sampled. May apply system constraints such as * disabling EU clock gating as required. */ - int (*enable_metric_set)(struct i915_perf_stream *stream); + struct i915_request * + (*enable_metric_set)(struct i915_perf_stream *stream); /** * @disable_metric_set: Remove system constraints associated with using diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index ec0299490dd4..aa729d04abe2 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -822,11 +822,6 @@ static ssize_t i915_pmu_event_show(struct device *dev, return sprintf(buf, "config=0x%lx\n", eattr->val); } -static struct attribute_group i915_pmu_events_attr_group = { - .name = "events", - /* Patch in attrs at runtime. */ -}; - static ssize_t i915_pmu_get_attr_cpumask(struct device *dev, struct device_attribute *attr, @@ -846,13 +841,6 @@ static const struct attribute_group i915_pmu_cpumask_attr_group = { .attrs = i915_cpumask_attrs, }; -static const struct attribute_group *i915_pmu_attr_groups[] = { - &i915_pmu_format_attr_group, - &i915_pmu_events_attr_group, - &i915_pmu_cpumask_attr_group, - NULL -}; - #define __event(__config, __name, __unit) \ { \ .config = (__config), \ @@ -1026,23 +1014,23 @@ err_alloc: static void free_event_attributes(struct i915_pmu *pmu) { - struct attribute **attr_iter = i915_pmu_events_attr_group.attrs; + struct attribute **attr_iter = pmu->events_attr_group.attrs; for (; *attr_iter; attr_iter++) kfree((*attr_iter)->name); - kfree(i915_pmu_events_attr_group.attrs); + kfree(pmu->events_attr_group.attrs); kfree(pmu->i915_attr); kfree(pmu->pmu_attr); - i915_pmu_events_attr_group.attrs = NULL; + pmu->events_attr_group.attrs = NULL; pmu->i915_attr = NULL; pmu->pmu_attr = NULL; } static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) { - struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); + struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); GEM_BUG_ON(!pmu->base.event_init); @@ -1055,7 +1043,7 @@ static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) { - struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); + struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); unsigned int target; GEM_BUG_ON(!pmu->base.event_init); @@ -1072,8 +1060,6 @@ static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) return 0; } -static enum cpuhp_state cpuhp_slot = CPUHP_INVALID; - static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu) { enum cpuhp_state slot; @@ -1087,21 +1073,22 @@ static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu) return ret; slot = ret; - ret = cpuhp_state_add_instance(slot, &pmu->node); + ret = cpuhp_state_add_instance(slot, &pmu->cpuhp.node); if (ret) { cpuhp_remove_multi_state(slot); return ret; } - cpuhp_slot = slot; + pmu->cpuhp.slot = slot; return 0; } static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu) { - WARN_ON(cpuhp_slot == CPUHP_INVALID); - WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &pmu->node)); - cpuhp_remove_multi_state(cpuhp_slot); + WARN_ON(pmu->cpuhp.slot == CPUHP_INVALID); + WARN_ON(cpuhp_state_remove_instance(pmu->cpuhp.slot, &pmu->cpuhp.node)); + cpuhp_remove_multi_state(pmu->cpuhp.slot); + pmu->cpuhp.slot = CPUHP_INVALID; } static bool is_igp(struct drm_i915_private *i915) @@ -1118,6 +1105,13 @@ static bool is_igp(struct drm_i915_private *i915) void i915_pmu_register(struct drm_i915_private *i915) { struct i915_pmu *pmu = &i915->pmu; + const struct attribute_group *attr_groups[] = { + &i915_pmu_format_attr_group, + &pmu->events_attr_group, + &i915_pmu_cpumask_attr_group, + NULL + }; + int ret = -ENOMEM; if (INTEL_GEN(i915) <= 2) { @@ -1128,6 +1122,7 @@ void i915_pmu_register(struct drm_i915_private *i915) spin_lock_init(&pmu->lock); hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); pmu->timer.function = i915_sample; + pmu->cpuhp.slot = CPUHP_INVALID; if (!is_igp(i915)) { pmu->name = kasprintf(GFP_KERNEL, @@ -1143,11 +1138,16 @@ void i915_pmu_register(struct drm_i915_private *i915) if (!pmu->name) goto err; - i915_pmu_events_attr_group.attrs = create_event_attributes(pmu); - if (!i915_pmu_events_attr_group.attrs) + pmu->events_attr_group.name = "events"; + pmu->events_attr_group.attrs = create_event_attributes(pmu); + if (!pmu->events_attr_group.attrs) goto err_name; - pmu->base.attr_groups = i915_pmu_attr_groups; + pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups), + GFP_KERNEL); + if (!pmu->base.attr_groups) + goto err_attr; + pmu->base.task_ctx_nr = perf_invalid_context; pmu->base.event_init = i915_pmu_event_init; pmu->base.add = i915_pmu_event_add; @@ -1159,7 +1159,7 @@ void i915_pmu_register(struct drm_i915_private *i915) ret = perf_pmu_register(&pmu->base, pmu->name, -1); if (ret) - goto err_attr; + goto err_groups; ret = i915_pmu_register_cpuhp_state(pmu); if (ret) @@ -1169,6 +1169,8 @@ void i915_pmu_register(struct drm_i915_private *i915) err_unreg: perf_pmu_unregister(&pmu->base); +err_groups: + kfree(pmu->base.attr_groups); err_attr: pmu->base.event_init = NULL; free_event_attributes(pmu); @@ -1194,6 +1196,7 @@ void i915_pmu_unregister(struct drm_i915_private *i915) perf_pmu_unregister(&pmu->base); pmu->base.event_init = NULL; + kfree(pmu->base.attr_groups); if (!is_igp(i915)) kfree(pmu->name); free_event_attributes(pmu); diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 6c1647c5daf2..f1d6cad0d7d5 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -39,9 +39,12 @@ struct i915_pmu_sample { struct i915_pmu { /** - * @node: List node for CPU hotplug handling. + * @cpuhp: Struct used for CPU hotplug handling. */ - struct hlist_node node; + struct { + struct hlist_node node; + enum cpuhp_state slot; + } cpuhp; /** * @base: PMU base. */ @@ -105,6 +108,10 @@ struct i915_pmu { */ ktime_t sleep_last; /** + * @events_attr_group: Device events attribute group. + */ + struct attribute_group events_attr_group; + /** * @i915_attr: Memory block holding device attributes. */ void *i915_attr; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 6cc55c103f67..3575fd30756b 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7757,6 +7757,7 @@ enum { #define BW_BUDDY1_CTL _MMIO(0x45140) #define BW_BUDDY2_CTL _MMIO(0x45150) #define BW_BUDDY_DISABLE REG_BIT(31) +#define BW_BUDDY_TLB_REQ_TIMER_MASK REG_GENMASK(21, 16) #define BW_BUDDY1_PAGE_MASK _MMIO(0x45144) #define BW_BUDDY2_PAGE_MASK _MMIO(0x45154) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index f56b046a32de..a18b2a244706 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -275,7 +275,7 @@ bool i915_request_retire(struct i915_request *rq) spin_unlock_irq(&rq->lock); remove_from_client(rq); - list_del(&rq->link); + list_del_rcu(&rq->link); intel_context_exit(rq->context); intel_context_unpin(rq->context); @@ -527,19 +527,31 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) return NOTIFY_DONE; } +static void irq_semaphore_cb(struct irq_work *wrk) +{ + struct i915_request *rq = + container_of(wrk, typeof(*rq), semaphore_work); + + i915_schedule_bump_priority(rq, I915_PRIORITY_NOSEMAPHORE); + i915_request_put(rq); +} + static int __i915_sw_fence_call semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { - struct i915_request *request = - container_of(fence, typeof(*request), semaphore); + struct i915_request *rq = container_of(fence, typeof(*rq), semaphore); switch (state) { case FENCE_COMPLETE: - i915_schedule_bump_priority(request, I915_PRIORITY_NOSEMAPHORE); + if (!(READ_ONCE(rq->sched.attr.priority) & I915_PRIORITY_NOSEMAPHORE)) { + i915_request_get(rq); + init_irq_work(&rq->semaphore_work, irq_semaphore_cb); + irq_work_queue(&rq->semaphore_work); + } break; case FENCE_FREE: - i915_request_put(request); + i915_request_put(rq); break; } @@ -721,6 +733,8 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->infix = rq->ring->emit; /* end of header; start of user payload */ intel_context_mark_active(ce); + list_add_tail_rcu(&rq->link, &tl->requests); + return rq; err_unwind: @@ -774,16 +788,26 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal) struct dma_fence *fence; int err; - GEM_BUG_ON(i915_request_timeline(rq) == - rcu_access_pointer(signal->timeline)); + if (i915_request_timeline(rq) == rcu_access_pointer(signal->timeline)) + return 0; + + if (i915_request_started(signal)) + return 0; fence = NULL; rcu_read_lock(); spin_lock_irq(&signal->lock); - if (!i915_request_started(signal) && - !list_is_first(&signal->link, - &rcu_dereference(signal->timeline)->requests)) { - struct i915_request *prev = list_prev_entry(signal, link); + do { + struct list_head *pos = READ_ONCE(signal->link.prev); + struct i915_request *prev; + + /* Confirm signal has not been retired, the link is valid */ + if (unlikely(i915_request_started(signal))) + break; + + /* Is signal the earliest request on its timeline? */ + if (pos == &rcu_dereference(signal->timeline)->requests) + break; /* * Peek at the request before us in the timeline. That @@ -791,20 +815,25 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal) * after acquiring a reference to it, confirm that it is * still part of the signaler's timeline. */ - if (i915_request_get_rcu(prev)) { - if (list_next_entry(prev, link) == signal) - fence = &prev->fence; - else - i915_request_put(prev); + prev = list_entry(pos, typeof(*prev), link); + if (!i915_request_get_rcu(prev)) + break; + + /* After the strong barrier, confirm prev is still attached */ + if (unlikely(READ_ONCE(prev->link.next) != &signal->link)) { + i915_request_put(prev); + break; } - } + + fence = &prev->fence; + } while (0); spin_unlock_irq(&signal->lock); rcu_read_unlock(); if (!fence) return 0; err = 0; - if (intel_timeline_sync_is_later(i915_request_timeline(rq), fence)) + if (!intel_timeline_sync_is_later(i915_request_timeline(rq), fence)) err = i915_sw_fence_await_dma_fence(&rq->submit, fence, 0, I915_FENCE_GFP); @@ -1242,8 +1271,6 @@ __i915_request_add_to_timeline(struct i915_request *rq) 0); } - list_add_tail(&rq->link, &timeline->requests); - /* * Make sure that no request gazumped us - if it was allocated after * our i915_request_alloc() and called __i915_request_add() before @@ -1303,9 +1330,9 @@ void __i915_request_queue(struct i915_request *rq, * decide whether to preempt the entire chain so that it is ready to * run at the earliest possible convenience. */ - i915_sw_fence_commit(&rq->semaphore); if (attr && rq->engine->schedule) rq->engine->schedule(rq, attr); + i915_sw_fence_commit(&rq->semaphore); i915_sw_fence_commit(&rq->submit); } diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index f57eadcf3583..fccc339949ec 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -26,6 +26,7 @@ #define I915_REQUEST_H #include <linux/dma-fence.h> +#include <linux/irq_work.h> #include <linux/lockdep.h> #include "gem/i915_gem_context_types.h" @@ -208,6 +209,7 @@ struct i915_request { }; struct list_head execute_cb; struct i915_sw_fence semaphore; + struct irq_work semaphore_work; /* * A list of everyone we wait upon, and everyone who waits upon us. diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index b0ade76bec90..d34141f7dcd8 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -234,6 +234,11 @@ static inline u64 ptr_to_u64(const void *ptr) __idx; \ }) +static inline bool is_power_of_2_u64(u64 n) +{ + return (n != 0 && ((n & (n - 1)) == 0)); +} + static inline void __list_del_many(struct list_head *head, struct list_head *first) { diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 0dfcd1787e65..fe85e487e477 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -486,6 +486,7 @@ static void mtk_drm_crtc_hw_config(struct mtk_drm_crtc *mtk_crtc) } #if IS_REACHABLE(CONFIG_MTK_CMDQ) if (mtk_crtc->cmdq_client) { + mbox_flush(mtk_crtc->cmdq_client->chan, 2000); cmdq_handle = cmdq_pkt_create(mtk_crtc->cmdq_client, PAGE_SIZE); cmdq_pkt_clear_event(cmdq_handle, mtk_crtc->cmdq_event); cmdq_pkt_wfe(cmdq_handle, mtk_crtc->cmdq_event); @@ -636,10 +637,18 @@ static const struct drm_crtc_helper_funcs mtk_crtc_helper_funcs = { static int mtk_drm_crtc_init(struct drm_device *drm, struct mtk_drm_crtc *mtk_crtc, - struct drm_plane *primary, - struct drm_plane *cursor, unsigned int pipe) + unsigned int pipe) { - int ret; + struct drm_plane *primary = NULL; + struct drm_plane *cursor = NULL; + int i, ret; + + for (i = 0; i < mtk_crtc->layer_nr; i++) { + if (mtk_crtc->planes[i].type == DRM_PLANE_TYPE_PRIMARY) + primary = &mtk_crtc->planes[i]; + else if (mtk_crtc->planes[i].type == DRM_PLANE_TYPE_CURSOR) + cursor = &mtk_crtc->planes[i]; + } ret = drm_crtc_init_with_planes(drm, &mtk_crtc->base, primary, cursor, &mtk_crtc_funcs, NULL); @@ -689,11 +698,12 @@ static int mtk_drm_crtc_num_comp_planes(struct mtk_drm_crtc *mtk_crtc, } static inline -enum drm_plane_type mtk_drm_crtc_plane_type(unsigned int plane_idx) +enum drm_plane_type mtk_drm_crtc_plane_type(unsigned int plane_idx, + unsigned int num_planes) { if (plane_idx == 0) return DRM_PLANE_TYPE_PRIMARY; - else if (plane_idx == 1) + else if (plane_idx == (num_planes - 1)) return DRM_PLANE_TYPE_CURSOR; else return DRM_PLANE_TYPE_OVERLAY; @@ -712,7 +722,8 @@ static int mtk_drm_crtc_init_comp_planes(struct drm_device *drm_dev, ret = mtk_plane_init(drm_dev, &mtk_crtc->planes[mtk_crtc->layer_nr], BIT(pipe), - mtk_drm_crtc_plane_type(mtk_crtc->layer_nr), + mtk_drm_crtc_plane_type(mtk_crtc->layer_nr, + num_planes), mtk_ddp_comp_supported_rotations(comp)); if (ret) return ret; @@ -807,9 +818,7 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, return ret; } - ret = mtk_drm_crtc_init(drm_dev, mtk_crtc, &mtk_crtc->planes[0], - mtk_crtc->layer_nr > 1 ? &mtk_crtc->planes[1] : - NULL, pipe); + ret = mtk_drm_crtc_init(drm_dev, mtk_crtc, pipe); if (ret < 0) return ret; @@ -828,7 +837,8 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, drm_crtc_index(&mtk_crtc->base)); mtk_crtc->cmdq_client = NULL; } - ret = of_property_read_u32_index(dev->of_node, "mediatek,gce-events", + ret = of_property_read_u32_index(priv->mutex_node, + "mediatek,gce-events", drm_crtc_index(&mtk_crtc->base), &mtk_crtc->cmdq_event); if (ret) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c index 1f5a112bb034..57c88de9a329 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c @@ -471,6 +471,7 @@ int mtk_ddp_comp_init(struct device *dev, struct device_node *node, /* Only DMA capable components need the LARB property */ comp->larb_dev = NULL; if (type != MTK_DISP_OVL && + type != MTK_DISP_OVL_2L && type != MTK_DISP_RDMA && type != MTK_DISP_WDMA) return 0; diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c index 914cc7619cd7..c2bd683a87c8 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c @@ -80,6 +80,7 @@ static int mtk_plane_atomic_async_check(struct drm_plane *plane, struct drm_plane_state *state) { struct drm_crtc_state *crtc_state; + int ret; if (plane != state->crtc->cursor) return -EINVAL; @@ -90,6 +91,11 @@ static int mtk_plane_atomic_async_check(struct drm_plane *plane, if (!plane->state->fb) return -EINVAL; + ret = mtk_drm_crtc_plane_check(state->crtc, plane, + to_mtk_plane_state(state)); + if (ret) + return ret; + if (state->state) crtc_state = drm_atomic_get_existing_crtc_state(state->state, state->crtc); @@ -115,6 +121,7 @@ static void mtk_plane_atomic_async_update(struct drm_plane *plane, plane->state->src_y = new_state->src_y; plane->state->src_h = new_state->src_h; plane->state->src_w = new_state->src_w; + swap(plane->state->fb, new_state->fb); state->pending.async_dirty = true; mtk_drm_crtc_async_update(new_state->crtc, plane, new_state); diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index 3107b0738e40..5d75f8cf6477 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -601,33 +601,27 @@ static irqreturn_t panfrost_mmu_irq_handler_thread(int irq, void *data) source_id = (fault_status >> 16); /* Page fault only */ - if ((status & mask) == BIT(i)) { - WARN_ON(exception_type < 0xC1 || exception_type > 0xC4); - + ret = -1; + if ((status & mask) == BIT(i) && (exception_type & 0xF8) == 0xC0) ret = panfrost_mmu_map_fault_addr(pfdev, i, addr); - if (!ret) { - mmu_write(pfdev, MMU_INT_CLEAR, BIT(i)); - status &= ~mask; - continue; - } - } - /* terminal fault, print info about the fault */ - dev_err(pfdev->dev, - "Unhandled Page fault in AS%d at VA 0x%016llX\n" - "Reason: %s\n" - "raw fault status: 0x%X\n" - "decoded fault status: %s\n" - "exception type 0x%X: %s\n" - "access type 0x%X: %s\n" - "source id 0x%X\n", - i, addr, - "TODO", - fault_status, - (fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"), - exception_type, panfrost_exception_name(pfdev, exception_type), - access_type, access_type_name(pfdev, fault_status), - source_id); + if (ret) + /* terminal fault, print info about the fault */ + dev_err(pfdev->dev, + "Unhandled Page fault in AS%d at VA 0x%016llX\n" + "Reason: %s\n" + "raw fault status: 0x%X\n" + "decoded fault status: %s\n" + "exception type 0x%X: %s\n" + "access type 0x%X: %s\n" + "source id 0x%X\n", + i, addr, + "TODO", + fault_status, + (fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"), + exception_type, panfrost_exception_name(pfdev, exception_type), + access_type, access_type_name(pfdev, fault_status), + source_id); mmu_write(pfdev, MMU_INT_CLEAR, mask); diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index fd74e2611185..8696af1ee14d 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -37,6 +37,7 @@ #include <linux/vga_switcheroo.h> #include <linux/mmu_notifier.h> +#include <drm/drm_agpsupport.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_drv.h> #include <drm/drm_fb_helper.h> @@ -325,6 +326,7 @@ static int radeon_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { unsigned long flags = 0; + struct drm_device *dev; int ret; if (!ent) @@ -365,7 +367,44 @@ static int radeon_pci_probe(struct pci_dev *pdev, if (ret) return ret; - return drm_get_pci_dev(pdev, ent, &kms_driver); + dev = drm_dev_alloc(&kms_driver, &pdev->dev); + if (IS_ERR(dev)) + return PTR_ERR(dev); + + ret = pci_enable_device(pdev); + if (ret) + goto err_free; + + dev->pdev = pdev; +#ifdef __alpha__ + dev->hose = pdev->sysdata; +#endif + + pci_set_drvdata(pdev, dev); + + if (pci_find_capability(dev->pdev, PCI_CAP_ID_AGP)) + dev->agp = drm_agp_init(dev); + if (dev->agp) { + dev->agp->agp_mtrr = arch_phys_wc_add( + dev->agp->agp_info.aper_base, + dev->agp->agp_info.aper_size * + 1024 * 1024); + } + + ret = drm_dev_register(dev, ent->driver_data); + if (ret) + goto err_agp; + + return 0; + +err_agp: + if (dev->agp) + arch_phys_wc_del(dev->agp->agp_mtrr); + kfree(dev->agp); + pci_disable_device(pdev); +err_free: + drm_dev_put(dev); + return ret; } static void @@ -575,7 +614,7 @@ radeon_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe, static struct drm_driver kms_driver = { .driver_features = - DRIVER_USE_AGP | DRIVER_GEM | DRIVER_RENDER, + DRIVER_GEM | DRIVER_RENDER, .load = radeon_driver_load_kms, .open = radeon_driver_open_kms, .postclose = radeon_driver_postclose_kms, diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index d24f23a81656..dd2f19b8022b 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -32,6 +32,7 @@ #include <linux/uaccess.h> #include <linux/vga_switcheroo.h> +#include <drm/drm_agpsupport.h> #include <drm/drm_fb_helper.h> #include <drm/drm_file.h> #include <drm/drm_ioctl.h> @@ -77,6 +78,11 @@ void radeon_driver_unload_kms(struct drm_device *dev) radeon_modeset_fini(rdev); radeon_device_fini(rdev); + if (dev->agp) + arch_phys_wc_del(dev->agp->agp_mtrr); + kfree(dev->agp); + dev->agp = NULL; + done_free: kfree(rdev); dev->dev_private = NULL; diff --git a/drivers/gpu/drm/sun4i/sun8i_mixer.c b/drivers/gpu/drm/sun4i/sun8i_mixer.c index 7c24f8f832a5..4a64f7ae437a 100644 --- a/drivers/gpu/drm/sun4i/sun8i_mixer.c +++ b/drivers/gpu/drm/sun4i/sun8i_mixer.c @@ -107,48 +107,128 @@ static const struct de2_fmt_info de2_formats[] = { .csc = SUN8I_CSC_MODE_OFF, }, { + /* for DE2 VI layer which ignores alpha */ + .drm_fmt = DRM_FORMAT_XRGB4444, + .de2_fmt = SUN8I_MIXER_FBFMT_ARGB4444, + .rgb = true, + .csc = SUN8I_CSC_MODE_OFF, + }, + { .drm_fmt = DRM_FORMAT_ABGR4444, .de2_fmt = SUN8I_MIXER_FBFMT_ABGR4444, .rgb = true, .csc = SUN8I_CSC_MODE_OFF, }, { + /* for DE2 VI layer which ignores alpha */ + .drm_fmt = DRM_FORMAT_XBGR4444, + .de2_fmt = SUN8I_MIXER_FBFMT_ABGR4444, + .rgb = true, + .csc = SUN8I_CSC_MODE_OFF, + }, + { .drm_fmt = DRM_FORMAT_RGBA4444, .de2_fmt = SUN8I_MIXER_FBFMT_RGBA4444, .rgb = true, .csc = SUN8I_CSC_MODE_OFF, }, { + /* for DE2 VI layer which ignores alpha */ + .drm_fmt = DRM_FORMAT_RGBX4444, + .de2_fmt = SUN8I_MIXER_FBFMT_RGBA4444, + .rgb = true, + .csc = SUN8I_CSC_MODE_OFF, + }, + { .drm_fmt = DRM_FORMAT_BGRA4444, .de2_fmt = SUN8I_MIXER_FBFMT_BGRA4444, .rgb = true, .csc = SUN8I_CSC_MODE_OFF, }, { + /* for DE2 VI layer which ignores alpha */ + .drm_fmt = DRM_FORMAT_BGRX4444, + .de2_fmt = SUN8I_MIXER_FBFMT_BGRA4444, + .rgb = true, + .csc = SUN8I_CSC_MODE_OFF, + }, + { .drm_fmt = DRM_FORMAT_ARGB1555, .de2_fmt = SUN8I_MIXER_FBFMT_ARGB1555, .rgb = true, .csc = SUN8I_CSC_MODE_OFF, }, { + /* for DE2 VI layer which ignores alpha */ + .drm_fmt = DRM_FORMAT_XRGB1555, + .de2_fmt = SUN8I_MIXER_FBFMT_ARGB1555, + .rgb = true, + .csc = SUN8I_CSC_MODE_OFF, + }, + { .drm_fmt = DRM_FORMAT_ABGR1555, .de2_fmt = SUN8I_MIXER_FBFMT_ABGR1555, .rgb = true, .csc = SUN8I_CSC_MODE_OFF, }, { + /* for DE2 VI layer which ignores alpha */ + .drm_fmt = DRM_FORMAT_XBGR1555, + .de2_fmt = SUN8I_MIXER_FBFMT_ABGR1555, + .rgb = true, + .csc = SUN8I_CSC_MODE_OFF, + }, + { .drm_fmt = DRM_FORMAT_RGBA5551, .de2_fmt = SUN8I_MIXER_FBFMT_RGBA5551, .rgb = true, .csc = SUN8I_CSC_MODE_OFF, }, { + /* for DE2 VI layer which ignores alpha */ + .drm_fmt = DRM_FORMAT_RGBX5551, + .de2_fmt = SUN8I_MIXER_FBFMT_RGBA5551, + .rgb = true, + .csc = SUN8I_CSC_MODE_OFF, + }, + { .drm_fmt = DRM_FORMAT_BGRA5551, .de2_fmt = SUN8I_MIXER_FBFMT_BGRA5551, .rgb = true, .csc = SUN8I_CSC_MODE_OFF, }, { + /* for DE2 VI layer which ignores alpha */ + .drm_fmt = DRM_FORMAT_BGRX5551, + .de2_fmt = SUN8I_MIXER_FBFMT_BGRA5551, + .rgb = true, + .csc = SUN8I_CSC_MODE_OFF, + }, + { + .drm_fmt = DRM_FORMAT_ARGB2101010, + .de2_fmt = SUN8I_MIXER_FBFMT_ARGB2101010, + .rgb = true, + .csc = SUN8I_CSC_MODE_OFF, + }, + { + .drm_fmt = DRM_FORMAT_ABGR2101010, + .de2_fmt = SUN8I_MIXER_FBFMT_ABGR2101010, + .rgb = true, + .csc = SUN8I_CSC_MODE_OFF, + }, + { + .drm_fmt = DRM_FORMAT_RGBA1010102, + .de2_fmt = SUN8I_MIXER_FBFMT_RGBA1010102, + .rgb = true, + .csc = SUN8I_CSC_MODE_OFF, + }, + { + .drm_fmt = DRM_FORMAT_BGRA1010102, + .de2_fmt = SUN8I_MIXER_FBFMT_BGRA1010102, + .rgb = true, + .csc = SUN8I_CSC_MODE_OFF, + }, + { .drm_fmt = DRM_FORMAT_UYVY, .de2_fmt = SUN8I_MIXER_FBFMT_UYVY, .rgb = false, @@ -197,12 +277,6 @@ static const struct de2_fmt_info de2_formats[] = { .csc = SUN8I_CSC_MODE_YUV2RGB, }, { - .drm_fmt = DRM_FORMAT_YUV444, - .de2_fmt = SUN8I_MIXER_FBFMT_RGB888, - .rgb = true, - .csc = SUN8I_CSC_MODE_YUV2RGB, - }, - { .drm_fmt = DRM_FORMAT_YUV422, .de2_fmt = SUN8I_MIXER_FBFMT_YUV422, .rgb = false, @@ -221,12 +295,6 @@ static const struct de2_fmt_info de2_formats[] = { .csc = SUN8I_CSC_MODE_YUV2RGB, }, { - .drm_fmt = DRM_FORMAT_YVU444, - .de2_fmt = SUN8I_MIXER_FBFMT_RGB888, - .rgb = true, - .csc = SUN8I_CSC_MODE_YVU2RGB, - }, - { .drm_fmt = DRM_FORMAT_YVU422, .de2_fmt = SUN8I_MIXER_FBFMT_YUV422, .rgb = false, @@ -244,6 +312,18 @@ static const struct de2_fmt_info de2_formats[] = { .rgb = false, .csc = SUN8I_CSC_MODE_YVU2RGB, }, + { + .drm_fmt = DRM_FORMAT_P010, + .de2_fmt = SUN8I_MIXER_FBFMT_P010_YUV, + .rgb = false, + .csc = SUN8I_CSC_MODE_YUV2RGB, + }, + { + .drm_fmt = DRM_FORMAT_P210, + .de2_fmt = SUN8I_MIXER_FBFMT_P210_YUV, + .rgb = false, + .csc = SUN8I_CSC_MODE_YUV2RGB, + }, }; const struct de2_fmt_info *sun8i_mixer_format_info(u32 format) diff --git a/drivers/gpu/drm/sun4i/sun8i_mixer.h b/drivers/gpu/drm/sun4i/sun8i_mixer.h index c6cc94057faf..345b28b0a80a 100644 --- a/drivers/gpu/drm/sun4i/sun8i_mixer.h +++ b/drivers/gpu/drm/sun4i/sun8i_mixer.h @@ -93,6 +93,10 @@ #define SUN8I_MIXER_FBFMT_ABGR1555 17 #define SUN8I_MIXER_FBFMT_RGBA5551 18 #define SUN8I_MIXER_FBFMT_BGRA5551 19 +#define SUN8I_MIXER_FBFMT_ARGB2101010 20 +#define SUN8I_MIXER_FBFMT_ABGR2101010 21 +#define SUN8I_MIXER_FBFMT_RGBA1010102 22 +#define SUN8I_MIXER_FBFMT_BGRA1010102 23 #define SUN8I_MIXER_FBFMT_YUYV 0 #define SUN8I_MIXER_FBFMT_UYVY 1 @@ -109,6 +113,13 @@ /* format 12 is semi-planar YUV411 UVUV */ /* format 13 is semi-planar YUV411 VUVU */ #define SUN8I_MIXER_FBFMT_YUV411 14 +/* format 15 doesn't exist */ +/* format 16 is P010 YVU */ +#define SUN8I_MIXER_FBFMT_P010_YUV 17 +/* format 18 is P210 YVU */ +#define SUN8I_MIXER_FBFMT_P210_YUV 19 +/* format 20 is packed YVU444 10-bit */ +/* format 21 is packed YUV444 10-bit */ /* * Sub-engines listed bellow are unused for now. The EN registers are here only diff --git a/drivers/gpu/drm/sun4i/sun8i_vi_layer.c b/drivers/gpu/drm/sun4i/sun8i_vi_layer.c index 42d445d23773..b8398ca18b0f 100644 --- a/drivers/gpu/drm/sun4i/sun8i_vi_layer.c +++ b/drivers/gpu/drm/sun4i/sun8i_vi_layer.c @@ -398,24 +398,66 @@ static const struct drm_plane_funcs sun8i_vi_layer_funcs = { }; /* - * While all RGB formats are supported, VI planes don't support - * alpha blending, so there is no point having formats with alpha - * channel if their opaque analog exist. + * While DE2 VI layer supports same RGB formats as UI layer, alpha + * channel is ignored. This structure lists all unique variants + * where alpha channel is replaced with "don't care" (X) channel. */ static const u32 sun8i_vi_layer_formats[] = { + DRM_FORMAT_BGR565, + DRM_FORMAT_BGR888, + DRM_FORMAT_BGRX4444, + DRM_FORMAT_BGRX5551, + DRM_FORMAT_BGRX8888, + DRM_FORMAT_RGB565, + DRM_FORMAT_RGB888, + DRM_FORMAT_RGBX4444, + DRM_FORMAT_RGBX5551, + DRM_FORMAT_RGBX8888, + DRM_FORMAT_XBGR1555, + DRM_FORMAT_XBGR4444, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_XRGB1555, + DRM_FORMAT_XRGB4444, + DRM_FORMAT_XRGB8888, + + DRM_FORMAT_NV16, + DRM_FORMAT_NV12, + DRM_FORMAT_NV21, + DRM_FORMAT_NV61, + DRM_FORMAT_UYVY, + DRM_FORMAT_VYUY, + DRM_FORMAT_YUYV, + DRM_FORMAT_YVYU, + DRM_FORMAT_YUV411, + DRM_FORMAT_YUV420, + DRM_FORMAT_YUV422, + DRM_FORMAT_YVU411, + DRM_FORMAT_YVU420, + DRM_FORMAT_YVU422, +}; + +static const u32 sun8i_vi_layer_de3_formats[] = { DRM_FORMAT_ABGR1555, + DRM_FORMAT_ABGR2101010, DRM_FORMAT_ABGR4444, + DRM_FORMAT_ABGR8888, DRM_FORMAT_ARGB1555, + DRM_FORMAT_ARGB2101010, DRM_FORMAT_ARGB4444, + DRM_FORMAT_ARGB8888, DRM_FORMAT_BGR565, DRM_FORMAT_BGR888, + DRM_FORMAT_BGRA1010102, DRM_FORMAT_BGRA5551, DRM_FORMAT_BGRA4444, + DRM_FORMAT_BGRA8888, DRM_FORMAT_BGRX8888, DRM_FORMAT_RGB565, DRM_FORMAT_RGB888, + DRM_FORMAT_RGBA1010102, DRM_FORMAT_RGBA4444, DRM_FORMAT_RGBA5551, + DRM_FORMAT_RGBA8888, DRM_FORMAT_RGBX8888, DRM_FORMAT_XBGR8888, DRM_FORMAT_XRGB8888, @@ -424,6 +466,8 @@ static const u32 sun8i_vi_layer_formats[] = { DRM_FORMAT_NV12, DRM_FORMAT_NV21, DRM_FORMAT_NV61, + DRM_FORMAT_P010, + DRM_FORMAT_P210, DRM_FORMAT_UYVY, DRM_FORMAT_VYUY, DRM_FORMAT_YUYV, @@ -431,11 +475,9 @@ static const u32 sun8i_vi_layer_formats[] = { DRM_FORMAT_YUV411, DRM_FORMAT_YUV420, DRM_FORMAT_YUV422, - DRM_FORMAT_YUV444, DRM_FORMAT_YVU411, DRM_FORMAT_YVU420, DRM_FORMAT_YVU422, - DRM_FORMAT_YVU444, }; struct sun8i_vi_layer *sun8i_vi_layer_init_one(struct drm_device *drm, @@ -443,19 +485,27 @@ struct sun8i_vi_layer *sun8i_vi_layer_init_one(struct drm_device *drm, int index) { u32 supported_encodings, supported_ranges; + unsigned int plane_cnt, format_count; struct sun8i_vi_layer *layer; - unsigned int plane_cnt; + const u32 *formats; int ret; layer = devm_kzalloc(drm->dev, sizeof(*layer), GFP_KERNEL); if (!layer) return ERR_PTR(-ENOMEM); + if (mixer->cfg->is_de3) { + formats = sun8i_vi_layer_de3_formats; + format_count = ARRAY_SIZE(sun8i_vi_layer_de3_formats); + } else { + formats = sun8i_vi_layer_formats; + format_count = ARRAY_SIZE(sun8i_vi_layer_formats); + } + /* possible crtcs are set later */ ret = drm_universal_plane_init(drm, &layer->plane, 0, &sun8i_vi_layer_funcs, - sun8i_vi_layer_formats, - ARRAY_SIZE(sun8i_vi_layer_formats), + formats, format_count, NULL, DRM_PLANE_TYPE_OVERLAY, NULL); if (ret) { dev_err(drm->dev, "Couldn't initialize layer\n"); diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 49ed55779128..953c82a4f573 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -515,6 +515,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, fbo->base.base.resv = &fbo->base.base._resv; dma_resv_init(&fbo->base.base._resv); + fbo->base.base.dev = NULL; ret = dma_resv_trylock(&fbo->base.base._resv); WARN_ON(!ret); diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index 017a9e0fc3bb..3af7ec80c7da 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -42,8 +42,8 @@ static int virtio_gpu_resource_id_get(struct virtio_gpu_device *vgdev, * "f91a9dd35715 Fix unlinking resources from hash * table." (Feb 2019) fixes the bug. */ - static int handle; - handle++; + static atomic_t seqno = ATOMIC_INIT(0); + int handle = atomic_inc_return(&seqno); *resid = handle + 1; } else { int handle = ida_alloc(&vgdev->resource_ida, GFP_KERNEL); @@ -99,6 +99,7 @@ struct drm_gem_object *virtio_gpu_create_object(struct drm_device *dev, return NULL; bo->base.base.funcs = &virtio_gpu_gem_funcs; + bo->base.map_cached = true; return &bo->base.base; } diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c index dddfca555df9..0b6ee1dee625 100644 --- a/drivers/hid/hid-hyperv.c +++ b/drivers/hid/hid-hyperv.c @@ -193,8 +193,7 @@ static void mousevsc_on_receive_device_info(struct mousevsc_dev *input_device, goto cleanup; /* The pointer is not NULL when we resume from hibernation */ - if (input_device->hid_desc != NULL) - kfree(input_device->hid_desc); + kfree(input_device->hid_desc); input_device->hid_desc = kmemdup(desc, desc->bLength, GFP_ATOMIC); if (!input_device->hid_desc) @@ -207,8 +206,7 @@ static void mousevsc_on_receive_device_info(struct mousevsc_dev *input_device, } /* The pointer is not NULL when we resume from hibernation */ - if (input_device->report_desc != NULL) - kfree(input_device->report_desc); + kfree(input_device->report_desc); input_device->report_desc = kzalloc(input_device->report_desc_size, GFP_ATOMIC); diff --git a/drivers/hwmon/adt7462.c b/drivers/hwmon/adt7462.c index 9632e2e3c4bb..319a0519ebdb 100644 --- a/drivers/hwmon/adt7462.c +++ b/drivers/hwmon/adt7462.c @@ -413,7 +413,7 @@ static int ADT7462_REG_VOLT(struct adt7462_data *data, int which) return 0x95; break; } - return -ENODEV; + return 0; } /* Provide labels for sysfs */ diff --git a/drivers/hwmon/pmbus/xdpe12284.c b/drivers/hwmon/pmbus/xdpe12284.c index ecd9b65627ec..660556b89e9f 100644 --- a/drivers/hwmon/pmbus/xdpe12284.c +++ b/drivers/hwmon/pmbus/xdpe12284.c @@ -18,6 +18,59 @@ #define XDPE122_AMD_625MV 0x10 /* AMD mode 6.25mV */ #define XDPE122_PAGE_NUM 2 +static int xdpe122_read_word_data(struct i2c_client *client, int page, int reg) +{ + const struct pmbus_driver_info *info = pmbus_get_driver_info(client); + long val; + s16 exponent; + s32 mantissa; + int ret; + + switch (reg) { + case PMBUS_VOUT_OV_FAULT_LIMIT: + case PMBUS_VOUT_UV_FAULT_LIMIT: + ret = pmbus_read_word_data(client, page, reg); + if (ret < 0) + return ret; + + /* Convert register value to LINEAR11 data. */ + exponent = ((s16)ret) >> 11; + mantissa = ((s16)((ret & GENMASK(10, 0)) << 5)) >> 5; + val = mantissa * 1000L; + if (exponent >= 0) + val <<= exponent; + else + val >>= -exponent; + + /* Convert data to VID register. */ + switch (info->vrm_version[page]) { + case vr13: + if (val >= 500) + return 1 + DIV_ROUND_CLOSEST(val - 500, 10); + return 0; + case vr12: + if (val >= 250) + return 1 + DIV_ROUND_CLOSEST(val - 250, 5); + return 0; + case imvp9: + if (val >= 200) + return 1 + DIV_ROUND_CLOSEST(val - 200, 10); + return 0; + case amd625mv: + if (val >= 200 && val <= 1550) + return DIV_ROUND_CLOSEST((1550 - val) * 100, + 625); + return 0; + default: + return -EINVAL; + } + default: + return -ENODATA; + } + + return 0; +} + static int xdpe122_identify(struct i2c_client *client, struct pmbus_driver_info *info) { @@ -70,6 +123,7 @@ static struct pmbus_driver_info xdpe122_info = { PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP | PMBUS_HAVE_POUT | PMBUS_HAVE_PIN | PMBUS_HAVE_STATUS_INPUT, .identify = xdpe122_identify, + .read_word_data = xdpe122_read_word_data, }; static int xdpe122_probe(struct i2c_client *client, diff --git a/drivers/i2c/busses/i2c-altera.c b/drivers/i2c/busses/i2c-altera.c index 5255d3755411..1de23b4f3809 100644 --- a/drivers/i2c/busses/i2c-altera.c +++ b/drivers/i2c/busses/i2c-altera.c @@ -171,7 +171,7 @@ static void altr_i2c_init(struct altr_i2c_dev *idev) /* SCL Low Time */ writel(t_low, idev->base + ALTR_I2C_SCL_LOW); /* SDA Hold Time, 300ns */ - writel(div_u64(300 * clk_mhz, 1000), idev->base + ALTR_I2C_SDA_HOLD); + writel(3 * clk_mhz / 10, idev->base + ALTR_I2C_SDA_HOLD); /* Mask all master interrupt bits */ altr_i2c_int_enable(idev, ALTR_I2C_ALL_IRQ, false); diff --git a/drivers/i2c/busses/i2c-jz4780.c b/drivers/i2c/busses/i2c-jz4780.c index 16a67a64284a..b426fc956938 100644 --- a/drivers/i2c/busses/i2c-jz4780.c +++ b/drivers/i2c/busses/i2c-jz4780.c @@ -78,25 +78,6 @@ #define X1000_I2C_DC_STOP BIT(9) -static const char * const jz4780_i2c_abrt_src[] = { - "ABRT_7B_ADDR_NOACK", - "ABRT_10ADDR1_NOACK", - "ABRT_10ADDR2_NOACK", - "ABRT_XDATA_NOACK", - "ABRT_GCALL_NOACK", - "ABRT_GCALL_READ", - "ABRT_HS_ACKD", - "SBYTE_ACKDET", - "ABRT_HS_NORSTRT", - "SBYTE_NORSTRT", - "ABRT_10B_RD_NORSTRT", - "ABRT_MASTER_DIS", - "ARB_LOST", - "SLVFLUSH_TXFIFO", - "SLV_ARBLOST", - "SLVRD_INTX", -}; - #define JZ4780_I2C_INTST_IGC BIT(11) #define JZ4780_I2C_INTST_ISTT BIT(10) #define JZ4780_I2C_INTST_ISTP BIT(9) @@ -576,21 +557,8 @@ done: static void jz4780_i2c_txabrt(struct jz4780_i2c *i2c, int src) { - int i; - - dev_err(&i2c->adap.dev, "txabrt: 0x%08x\n", src); - dev_err(&i2c->adap.dev, "device addr=%x\n", - jz4780_i2c_readw(i2c, JZ4780_I2C_TAR)); - dev_err(&i2c->adap.dev, "send cmd count:%d %d\n", - i2c->cmd, i2c->cmd_buf[i2c->cmd]); - dev_err(&i2c->adap.dev, "receive data count:%d %d\n", - i2c->cmd, i2c->data_buf[i2c->cmd]); - - for (i = 0; i < 16; i++) { - if (src & BIT(i)) - dev_dbg(&i2c->adap.dev, "I2C TXABRT[%d]=%s\n", - i, jz4780_i2c_abrt_src[i]); - } + dev_dbg(&i2c->adap.dev, "txabrt: 0x%08x, cmd: %d, send: %d, recv: %d\n", + src, i2c->cmd, i2c->cmd_buf[i2c->cmd], i2c->data_buf[i2c->cmd]); } static inline int jz4780_i2c_xfer_read(struct jz4780_i2c *i2c, diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c index 1bb99b556393..05c26986637b 100644 --- a/drivers/ide/ide-gd.c +++ b/drivers/ide/ide-gd.c @@ -361,7 +361,7 @@ static const struct block_device_operations ide_gd_ops = { .release = ide_gd_release, .ioctl = ide_gd_ioctl, #ifdef CONFIG_COMPAT - .ioctl = ide_gd_compat_ioctl, + .compat_ioctl = ide_gd_compat_ioctl, #endif .getgeo = ide_gd_getgeo, .check_events = ide_gd_check_events, diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 68cc1b2d6824..15e99a888427 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1191,6 +1191,7 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, /* Sharing an ib_cm_id with different handlers is not * supported */ spin_unlock_irqrestore(&cm.lock, flags); + ib_destroy_cm_id(cm_id); return ERR_PTR(-EINVAL); } refcount_inc(&cm_id_priv->refcount); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 72f032160c4b..2dec3a02ab9f 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -3212,19 +3212,26 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, int ret; id_priv = container_of(id, struct rdma_id_private, id); + memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); if (id_priv->state == RDMA_CM_IDLE) { ret = cma_bind_addr(id, src_addr, dst_addr); - if (ret) + if (ret) { + memset(cma_dst_addr(id_priv), 0, + rdma_addr_size(dst_addr)); return ret; + } } - if (cma_family(id_priv) != dst_addr->sa_family) + if (cma_family(id_priv) != dst_addr->sa_family) { + memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); return -EINVAL; + } - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { + memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); return -EINVAL; + } - memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); if (cma_any_addr(dst_addr)) { ret = cma_resolve_loopback(id_priv); } else { diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index b1457b3464d3..cf42acca4a3a 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -338,6 +338,20 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, qp->pd = pd; qp->uobject = uobj; qp->real_qp = qp; + + qp->qp_type = attr->qp_type; + qp->rwq_ind_tbl = attr->rwq_ind_tbl; + qp->send_cq = attr->send_cq; + qp->recv_cq = attr->recv_cq; + qp->srq = attr->srq; + qp->rwq_ind_tbl = attr->rwq_ind_tbl; + qp->event_handler = attr->event_handler; + + atomic_set(&qp->usecnt, 0); + spin_lock_init(&qp->mr_lock); + INIT_LIST_HEAD(&qp->rdma_mrs); + INIT_LIST_HEAD(&qp->sig_mrs); + /* * We don't track XRC QPs for now, because they don't have PD * and more importantly they are created internaly by driver, diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index ade71823370f..da8adadf4755 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -159,8 +159,10 @@ static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) { struct list_head *e, *tmp; - list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) + list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) { + list_del(e); kfree(list_entry(e, struct iwcm_work, free_list)); + } } static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 37b433aa7306..e0b0a91da696 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1757,6 +1757,8 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, if (ret) goto err_msg; } else { + if (!tb[RDMA_NLDEV_ATTR_RES_LQPN]) + goto err_msg; qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) { cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 4fad732f9b3c..06e5b6787443 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -273,6 +273,23 @@ static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp, return 1; } +static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg, + u32 sg_cnt, enum dma_data_direction dir) +{ + if (is_pci_p2pdma_page(sg_page(sg))) + pci_p2pdma_unmap_sg(dev->dma_device, sg, sg_cnt, dir); + else + ib_dma_unmap_sg(dev, sg, sg_cnt, dir); +} + +static int rdma_rw_map_sg(struct ib_device *dev, struct scatterlist *sg, + u32 sg_cnt, enum dma_data_direction dir) +{ + if (is_pci_p2pdma_page(sg_page(sg))) + return pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir); + return ib_dma_map_sg(dev, sg, sg_cnt, dir); +} + /** * rdma_rw_ctx_init - initialize a RDMA READ/WRITE context * @ctx: context to initialize @@ -295,11 +312,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, struct ib_device *dev = qp->pd->device; int ret; - if (is_pci_p2pdma_page(sg_page(sg))) - ret = pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir); - else - ret = ib_dma_map_sg(dev, sg, sg_cnt, dir); - + ret = rdma_rw_map_sg(dev, sg, sg_cnt, dir); if (!ret) return -ENOMEM; sg_cnt = ret; @@ -338,7 +351,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, return ret; out_unmap_sg: - ib_dma_unmap_sg(dev, sg, sg_cnt, dir); + rdma_rw_unmap_sg(dev, sg, sg_cnt, dir); return ret; } EXPORT_SYMBOL(rdma_rw_ctx_init); @@ -588,11 +601,7 @@ void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, break; } - if (is_pci_p2pdma_page(sg_page(sg))) - pci_p2pdma_unmap_sg(qp->pd->device->dma_device, sg, - sg_cnt, dir); - else - ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); + rdma_rw_unmap_sg(qp->pd->device, sg, sg_cnt, dir); } EXPORT_SYMBOL(rdma_rw_ctx_destroy); diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 2b4d80393bd0..2d5608315dc8 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -340,15 +340,19 @@ static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp, return NULL; if (qp_attr_mask & IB_QP_PORT) - new_pps->main.port_num = - (qp_pps) ? qp_pps->main.port_num : qp_attr->port_num; + new_pps->main.port_num = qp_attr->port_num; + else if (qp_pps) + new_pps->main.port_num = qp_pps->main.port_num; + if (qp_attr_mask & IB_QP_PKEY_INDEX) - new_pps->main.pkey_index = (qp_pps) ? qp_pps->main.pkey_index : - qp_attr->pkey_index; + new_pps->main.pkey_index = qp_attr->pkey_index; + else if (qp_pps) + new_pps->main.pkey_index = qp_pps->main.pkey_index; + if ((qp_attr_mask & IB_QP_PKEY_INDEX) && (qp_attr_mask & IB_QP_PORT)) new_pps->main.state = IB_PORT_PKEY_VALID; - if (!(qp_attr_mask & (IB_QP_PKEY_INDEX || IB_QP_PORT)) && qp_pps) { + if (!(qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) && qp_pps) { new_pps->main.port_num = qp_pps->main.port_num; new_pps->main.pkey_index = qp_pps->main.pkey_index; if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index b8c657b28380..cd656ad4953b 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -181,14 +181,28 @@ ib_umem_odp_alloc_child(struct ib_umem_odp *root, unsigned long addr, odp_data->page_shift = PAGE_SHIFT; odp_data->notifier.ops = ops; + /* + * A mmget must be held when registering a notifier, the owming_mm only + * has a mm_grab at this point. + */ + if (!mmget_not_zero(umem->owning_mm)) { + ret = -EFAULT; + goto out_free; + } + odp_data->tgid = get_pid(root->tgid); ret = ib_init_umem_odp(odp_data, ops); - if (ret) { - put_pid(odp_data->tgid); - kfree(odp_data); - return ERR_PTR(ret); - } + if (ret) + goto out_tgid; + mmput(umem->owning_mm); return odp_data; + +out_tgid: + put_pid(odp_data->tgid); + mmput(umem->owning_mm); +out_free: + kfree(odp_data); + return ERR_PTR(ret); } EXPORT_SYMBOL(ib_umem_odp_alloc_child); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 025933752e1d..060b4ebbd2ba 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1445,16 +1445,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs, if (ret) goto err_cb; - qp->pd = pd; - qp->send_cq = attr.send_cq; - qp->recv_cq = attr.recv_cq; - qp->srq = attr.srq; - qp->rwq_ind_tbl = ind_tbl; - qp->event_handler = attr.event_handler; - qp->qp_type = attr.qp_type; - atomic_set(&qp->usecnt, 0); atomic_inc(&pd->usecnt); - qp->port = 0; if (attr.send_cq) atomic_inc(&attr.send_cq->usecnt); if (attr.recv_cq) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 3ebae3b65c28..e62c9dfc7837 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1185,16 +1185,6 @@ struct ib_qp *ib_create_qp_user(struct ib_pd *pd, if (ret) goto err; - qp->qp_type = qp_init_attr->qp_type; - qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl; - - atomic_set(&qp->usecnt, 0); - qp->mrs_used = 0; - spin_lock_init(&qp->mr_lock); - INIT_LIST_HEAD(&qp->rdma_mrs); - INIT_LIST_HEAD(&qp->sig_mrs); - qp->port = 0; - if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) { struct ib_qp *xrc_qp = create_xrc_qp_user(qp, qp_init_attr, udata); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 089e201d7550..2f6323ad9c59 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -515,10 +515,11 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, opa_get_lid(packet->dlid, 9B)); if (!mcast) goto drop; + rcu_read_lock(); list_for_each_entry_rcu(p, &mcast->qp_list, list) { packet->qp = p->qp; if (hfi1_do_pkey_check(packet)) - goto drop; + goto unlock_drop; spin_lock_irqsave(&packet->qp->r_lock, flags); packet_handler = qp_ok(packet); if (likely(packet_handler)) @@ -527,6 +528,7 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, ibp->rvp.n_pkt_drops++; spin_unlock_irqrestore(&packet->qp->r_lock, flags); } + rcu_read_unlock(); /* * Notify rvt_multicast_detach() if it is waiting for us * to finish. diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e4bcfa81b70a..4de380f3d581 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3570,7 +3570,8 @@ static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev, misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); - MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); } else { misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d9bffcc93587..bb78142bca5e 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -636,6 +636,7 @@ struct mlx5_ib_mr { /* For ODP and implicit */ atomic_t num_deferred_work; + wait_queue_head_t q_deferred_work; struct xarray implicit_children; union { struct rcu_head rcu; diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 4216814ba871..bf50cd91f472 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -235,7 +235,8 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) mr->parent = NULL; mlx5_mr_cache_free(mr->dev, mr); ib_umem_odp_release(odp); - atomic_dec(&imr->num_deferred_work); + if (atomic_dec_and_test(&imr->num_deferred_work)) + wake_up(&imr->q_deferred_work); } static void free_implicit_child_mr_work(struct work_struct *work) @@ -554,6 +555,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, imr->umem = &umem_odp->umem; imr->is_odp_implicit = true; atomic_set(&imr->num_deferred_work, 0); + init_waitqueue_head(&imr->q_deferred_work); xa_init(&imr->implicit_children); err = mlx5_ib_update_xlt(imr, 0, @@ -611,10 +613,7 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) * under xa_lock while the child is in the xarray. Thus at this point * it is only decreasing, and all work holding it is now on the wq. */ - if (atomic_read(&imr->num_deferred_work)) { - flush_workqueue(system_unbound_wq); - WARN_ON(atomic_read(&imr->num_deferred_work)); - } + wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work)); /* * Fence the imr before we destroy the children. This allows us to @@ -645,10 +644,7 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) /* Wait for all running page-fault handlers to finish. */ synchronize_srcu(&mr->dev->odp_srcu); - if (atomic_read(&mr->num_deferred_work)) { - flush_workqueue(system_unbound_wq); - WARN_ON(atomic_read(&mr->num_deferred_work)); - } + wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work)); dma_fence_odp_mr(mr); } @@ -1720,7 +1716,8 @@ static void destroy_prefetch_work(struct prefetch_mr_work *work) u32 i; for (i = 0; i < work->num_sge; ++i) - atomic_dec(&work->frags[i].mr->num_deferred_work); + if (atomic_dec_and_test(&work->frags[i].mr->num_deferred_work)) + wake_up(&work->frags[i].mr->q_deferred_work); kvfree(work); } diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 33778d451b82..5ef93f8f17a1 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -329,8 +329,10 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) if (mcast == NULL) goto drop; this_cpu_inc(ibp->pmastats->n_multicast_rcv); + rcu_read_lock(); list_for_each_entry_rcu(p, &mcast->qp_list, list) qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp); + rcu_read_unlock(); /* * Notify rvt_multicast_detach() if it is waiting for us * to finish. diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 96ed349c0939..5cd40fb9e20c 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -388,6 +388,9 @@ static struct siw_device *siw_device_create(struct net_device *netdev) { .max_segment_size = SZ_2G }; base_dev->num_comp_vectors = num_possible_cpus(); + xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1); + xa_init_flags(&sdev->mem_xa, XA_FLAGS_ALLOC1); + ib_set_device_ops(base_dev, &siw_device_ops); rv = ib_device_set_netdev(base_dev, netdev, 1); if (rv) @@ -415,9 +418,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev) sdev->attrs.max_srq_wr = SIW_MAX_SRQ_WR; sdev->attrs.max_srq_sge = SIW_MAX_SGE; - xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1); - xa_init_flags(&sdev->mem_xa, XA_FLAGS_ALLOC1); - INIT_LIST_HEAD(&sdev->cep_list); INIT_LIST_HEAD(&sdev->qp_list); diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index f277e467156f..2c6515e3ecf1 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -445,6 +445,11 @@ struct icc_path *of_icc_get(struct device *dev, const char *name) path->name = kasprintf(GFP_KERNEL, "%s-%s", src_node->name, dst_node->name); + if (!path->name) { + kfree(path); + return ERR_PTR(-ENOMEM); + } + return path; } EXPORT_SYMBOL_GPL(of_icc_get); @@ -579,6 +584,10 @@ struct icc_path *icc_get(struct device *dev, const int src_id, const int dst_id) } path->name = kasprintf(GFP_KERNEL, "%s-%s", src->name, dst->name); + if (!path->name) { + kfree(path); + path = ERR_PTR(-ENOMEM); + } out: mutex_unlock(&icc_lock); return path; diff --git a/drivers/macintosh/therm_windtunnel.c b/drivers/macintosh/therm_windtunnel.c index 8c744578122a..a0d87ed9da69 100644 --- a/drivers/macintosh/therm_windtunnel.c +++ b/drivers/macintosh/therm_windtunnel.c @@ -300,9 +300,11 @@ static int control_loop(void *dummy) /* i2c probing and setup */ /************************************************************************/ -static int -do_attach( struct i2c_adapter *adapter ) +static void do_attach(struct i2c_adapter *adapter) { + struct i2c_board_info info = { }; + struct device_node *np; + /* scan 0x48-0x4f (DS1775) and 0x2c-2x2f (ADM1030) */ static const unsigned short scan_ds1775[] = { 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, @@ -313,25 +315,24 @@ do_attach( struct i2c_adapter *adapter ) I2C_CLIENT_END }; - if( strncmp(adapter->name, "uni-n", 5) ) - return 0; - - if( !x.running ) { - struct i2c_board_info info; + if (x.running || strncmp(adapter->name, "uni-n", 5)) + return; - memset(&info, 0, sizeof(struct i2c_board_info)); - strlcpy(info.type, "therm_ds1775", I2C_NAME_SIZE); + np = of_find_compatible_node(adapter->dev.of_node, NULL, "MAC,ds1775"); + if (np) { + of_node_put(np); + } else { + strlcpy(info.type, "MAC,ds1775", I2C_NAME_SIZE); i2c_new_probed_device(adapter, &info, scan_ds1775, NULL); + } - strlcpy(info.type, "therm_adm1030", I2C_NAME_SIZE); + np = of_find_compatible_node(adapter->dev.of_node, NULL, "MAC,adm1030"); + if (np) { + of_node_put(np); + } else { + strlcpy(info.type, "MAC,adm1030", I2C_NAME_SIZE); i2c_new_probed_device(adapter, &info, scan_adm1030, NULL); - - if( x.thermostat && x.fan ) { - x.running = 1; - x.poll_task = kthread_run(control_loop, NULL, "g4fand"); - } } - return 0; } static int @@ -404,8 +405,8 @@ out: enum chip { ds1775, adm1030 }; static const struct i2c_device_id therm_windtunnel_id[] = { - { "therm_ds1775", ds1775 }, - { "therm_adm1030", adm1030 }, + { "MAC,ds1775", ds1775 }, + { "MAC,adm1030", adm1030 }, { } }; MODULE_DEVICE_TABLE(i2c, therm_windtunnel_id); @@ -414,6 +415,7 @@ static int do_probe(struct i2c_client *cl, const struct i2c_device_id *id) { struct i2c_adapter *adapter = cl->adapter; + int ret = 0; if( !i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WORD_DATA | I2C_FUNC_SMBUS_WRITE_BYTE) ) @@ -421,11 +423,19 @@ do_probe(struct i2c_client *cl, const struct i2c_device_id *id) switch (id->driver_data) { case adm1030: - return attach_fan( cl ); + ret = attach_fan(cl); + break; case ds1775: - return attach_thermostat(cl); + ret = attach_thermostat(cl); + break; } - return 0; + + if (!x.running && x.thermostat && x.fan) { + x.running = 1; + x.poll_task = kthread_run(control_loop, NULL, "g4fand"); + } + + return ret; } static struct i2c_driver g4fan_driver = { diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 8bc1faf71ff2..a1df0d95151c 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -67,7 +67,6 @@ #include <linux/blkdev.h> #include <linux/kthread.h> #include <linux/random.h> -#include <linux/sched/signal.h> #include <trace/events/bcache.h> #define MAX_OPEN_BUCKETS 128 @@ -734,21 +733,8 @@ int bch_open_buckets_alloc(struct cache_set *c) int bch_cache_allocator_start(struct cache *ca) { - struct task_struct *k; - - /* - * In case previous btree check operation occupies too many - * system memory for bcache btree node cache, and the - * registering process is selected by OOM killer. Here just - * ignore the SIGKILL sent by OOM killer if there is, to - * avoid kthread_run() being failed by pending signals. The - * bcache registering process will exit after the registration - * done. - */ - if (signal_pending(current)) - flush_signals(current); - - k = kthread_run(bch_allocator_thread, ca, "bcache_allocator"); + struct task_struct *k = kthread_run(bch_allocator_thread, + ca, "bcache_allocator"); if (IS_ERR(k)) return PTR_ERR(k); diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index b12186c87f52..fa872df4e770 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -34,7 +34,6 @@ #include <linux/random.h> #include <linux/rcupdate.h> #include <linux/sched/clock.h> -#include <linux/sched/signal.h> #include <linux/rculist.h> #include <linux/delay.h> #include <trace/events/bcache.h> @@ -1914,18 +1913,6 @@ static int bch_gc_thread(void *arg) int bch_gc_thread_start(struct cache_set *c) { - /* - * In case previous btree check operation occupies too many - * system memory for bcache btree node cache, and the - * registering process is selected by OOM killer. Here just - * ignore the SIGKILL sent by OOM killer if there is, to - * avoid kthread_run() being failed by pending signals. The - * bcache registering process will exit after the registration - * done. - */ - if (signal_pending(current)) - flush_signals(current); - c->gc_thread = kthread_run(bch_gc_thread, c, "bcache_gc"); return PTR_ERR_OR_ZERO(c->gc_thread); } diff --git a/drivers/md/dm-bio-record.h b/drivers/md/dm-bio-record.h index c82578af56a5..2ea0360108e1 100644 --- a/drivers/md/dm-bio-record.h +++ b/drivers/md/dm-bio-record.h @@ -20,8 +20,13 @@ struct dm_bio_details { struct gendisk *bi_disk; u8 bi_partno; + int __bi_remaining; unsigned long bi_flags; struct bvec_iter bi_iter; + bio_end_io_t *bi_end_io; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + struct bio_integrity_payload *bi_integrity; +#endif }; static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio) @@ -30,6 +35,11 @@ static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio) bd->bi_partno = bio->bi_partno; bd->bi_flags = bio->bi_flags; bd->bi_iter = bio->bi_iter; + bd->__bi_remaining = atomic_read(&bio->__bi_remaining); + bd->bi_end_io = bio->bi_end_io; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + bd->bi_integrity = bio_integrity(bio); +#endif } static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio) @@ -38,6 +48,11 @@ static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio) bio->bi_partno = bd->bi_partno; bio->bi_flags = bd->bi_flags; bio->bi_iter = bd->bi_iter; + atomic_set(&bio->__bi_remaining, bd->__bi_remaining); + bio->bi_end_io = bd->bi_end_io; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + bio->bi_integrity = bd->bi_integrity; +#endif } #endif diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 2d32821b3a5b..d3bb355819a4 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2846,8 +2846,8 @@ static void cache_postsuspend(struct dm_target *ti) prevent_background_work(cache); BUG_ON(atomic_read(&cache->nr_io_migrations)); - cancel_delayed_work(&cache->waker); - flush_workqueue(cache->wq); + cancel_delayed_work_sync(&cache->waker); + drain_workqueue(cache->wq); WARN_ON(cache->tracker.in_flight); /* @@ -3492,7 +3492,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type cache_target = { .name = "cache", - .version = {2, 1, 0}, + .version = {2, 2, 0}, .module = THIS_MODULE, .ctr = cache_ctr, .dtr = cache_dtr, diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index b225b3e445fa..2f03fecd312d 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -6,6 +6,8 @@ * This file is released under the GPL. */ +#include "dm-bio-record.h" + #include <linux/compiler.h> #include <linux/module.h> #include <linux/device-mapper.h> @@ -201,17 +203,19 @@ struct dm_integrity_c { __u8 log2_blocks_per_bitmap_bit; unsigned char mode; - int suspending; int failed; struct crypto_shash *internal_hash; + struct dm_target *ti; + /* these variables are locked with endio_wait.lock */ struct rb_root in_progress; struct list_head wait_list; wait_queue_head_t endio_wait; struct workqueue_struct *wait_wq; + struct workqueue_struct *offload_wq; unsigned char commit_seq; commit_id_t commit_ids[N_COMMIT_IDS]; @@ -293,11 +297,7 @@ struct dm_integrity_io { struct completion *completion; - struct gendisk *orig_bi_disk; - u8 orig_bi_partno; - bio_end_io_t *orig_bi_end_io; - struct bio_integrity_payload *orig_bi_integrity; - struct bvec_iter orig_bi_iter; + struct dm_bio_details bio_details; }; struct journal_completion { @@ -1439,7 +1439,7 @@ static void dec_in_flight(struct dm_integrity_io *dio) dio->range.logical_sector += dio->range.n_sectors; bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT); INIT_WORK(&dio->work, integrity_bio_wait); - queue_work(ic->wait_wq, &dio->work); + queue_work(ic->offload_wq, &dio->work); return; } do_endio_flush(ic, dio); @@ -1450,14 +1450,9 @@ static void integrity_end_io(struct bio *bio) { struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); - bio->bi_iter = dio->orig_bi_iter; - bio->bi_disk = dio->orig_bi_disk; - bio->bi_partno = dio->orig_bi_partno; - if (dio->orig_bi_integrity) { - bio->bi_integrity = dio->orig_bi_integrity; + dm_bio_restore(&dio->bio_details, bio); + if (bio->bi_integrity) bio->bi_opf |= REQ_INTEGRITY; - } - bio->bi_end_io = dio->orig_bi_end_io; if (dio->completion) complete(dio->completion); @@ -1542,7 +1537,7 @@ static void integrity_metadata(struct work_struct *w) } } - __bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) { + __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) { unsigned pos; char *mem, *checksums_ptr; @@ -1586,7 +1581,7 @@ again: if (likely(checksums != checksums_onstack)) kfree(checksums); } else { - struct bio_integrity_payload *bip = dio->orig_bi_integrity; + struct bio_integrity_payload *bip = dio->bio_details.bi_integrity; if (bip) { struct bio_vec biv; @@ -1865,7 +1860,7 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map if (need_sync_io && from_map) { INIT_WORK(&dio->work, integrity_bio_wait); - queue_work(ic->metadata_wq, &dio->work); + queue_work(ic->offload_wq, &dio->work); return; } @@ -2005,20 +2000,13 @@ offload_to_thread: } else dio->completion = NULL; - dio->orig_bi_iter = bio->bi_iter; - - dio->orig_bi_disk = bio->bi_disk; - dio->orig_bi_partno = bio->bi_partno; + dm_bio_record(&dio->bio_details, bio); bio_set_dev(bio, ic->dev->bdev); - - dio->orig_bi_integrity = bio_integrity(bio); bio->bi_integrity = NULL; bio->bi_opf &= ~REQ_INTEGRITY; - - dio->orig_bi_end_io = bio->bi_end_io; bio->bi_end_io = integrity_end_io; - bio->bi_iter.bi_size = dio->range.n_sectors << SECTOR_SHIFT; + generic_make_request(bio); if (need_sync_io) { @@ -2315,7 +2303,7 @@ static void integrity_writer(struct work_struct *w) unsigned prev_free_sectors; /* the following test is not needed, but it tests the replay code */ - if (READ_ONCE(ic->suspending) && !ic->meta_dev) + if (unlikely(dm_suspended(ic->ti)) && !ic->meta_dev) return; spin_lock_irq(&ic->endio_wait.lock); @@ -2376,7 +2364,7 @@ static void integrity_recalc(struct work_struct *w) next_chunk: - if (unlikely(READ_ONCE(ic->suspending))) + if (unlikely(dm_suspended(ic->ti))) goto unlock_ret; range.logical_sector = le64_to_cpu(ic->sb->recalc_sector); @@ -2501,7 +2489,7 @@ static void bitmap_block_work(struct work_struct *w) dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) { remove_range(ic, &dio->range); INIT_WORK(&dio->work, integrity_bio_wait); - queue_work(ic->wait_wq, &dio->work); + queue_work(ic->offload_wq, &dio->work); } else { block_bitmap_op(ic, ic->journal, dio->range.logical_sector, dio->range.n_sectors, BITMAP_OP_SET); @@ -2524,7 +2512,7 @@ static void bitmap_block_work(struct work_struct *w) remove_range(ic, &dio->range); INIT_WORK(&dio->work, integrity_bio_wait); - queue_work(ic->wait_wq, &dio->work); + queue_work(ic->offload_wq, &dio->work); } queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval); @@ -2804,8 +2792,6 @@ static void dm_integrity_postsuspend(struct dm_target *ti) del_timer_sync(&ic->autocommit_timer); - WRITE_ONCE(ic->suspending, 1); - if (ic->recalc_wq) drain_workqueue(ic->recalc_wq); @@ -2834,8 +2820,6 @@ static void dm_integrity_postsuspend(struct dm_target *ti) #endif } - WRITE_ONCE(ic->suspending, 0); - BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); ic->journal_uptodate = true; @@ -2888,17 +2872,24 @@ static void dm_integrity_resume(struct dm_target *ti) } else { replay_journal(ic); if (ic->mode == 'B') { - int mode; ic->sb->flags |= cpu_to_le32(SB_FLAG_DIRTY_BITMAP); ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit; r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA); if (unlikely(r)) dm_integrity_io_error(ic, "writing superblock", r); - mode = ic->recalculate_flag ? BITMAP_OP_SET : BITMAP_OP_CLEAR; - block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, mode); - block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, mode); - block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, mode); + block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR); + block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR); + block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR); + if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) && + le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors) { + block_bitmap_op(ic, ic->journal, le64_to_cpu(ic->sb->recalc_sector), + ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET); + block_bitmap_op(ic, ic->recalc_bitmap, le64_to_cpu(ic->sb->recalc_sector), + ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET); + block_bitmap_op(ic, ic->may_write_bitmap, le64_to_cpu(ic->sb->recalc_sector), + ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET); + } rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0, ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); } @@ -2967,7 +2958,7 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, DMEMIT(" meta_device:%s", ic->meta_dev->name); if (ic->sectors_per_block != 1) DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT); - if (ic->recalculate_flag) + if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) DMEMIT(" recalculate"); DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS); DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors); @@ -3623,6 +3614,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) } ti->private = ic; ti->per_io_data_size = sizeof(struct dm_integrity_io); + ic->ti = ti; ic->in_progress = RB_ROOT; INIT_LIST_HEAD(&ic->wait_list); @@ -3836,6 +3828,14 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } + ic->offload_wq = alloc_workqueue("dm-integrity-offload", WQ_MEM_RECLAIM, + METADATA_WORKQUEUE_MAX_ACTIVE); + if (!ic->offload_wq) { + ti->error = "Cannot allocate workqueue"; + r = -ENOMEM; + goto bad; + } + ic->commit_wq = alloc_workqueue("dm-integrity-commit", WQ_MEM_RECLAIM, 1); if (!ic->commit_wq) { ti->error = "Cannot allocate workqueue"; @@ -4140,6 +4140,8 @@ static void dm_integrity_dtr(struct dm_target *ti) destroy_workqueue(ic->metadata_wq); if (ic->wait_wq) destroy_workqueue(ic->wait_wq); + if (ic->offload_wq) + destroy_workqueue(ic->offload_wq); if (ic->commit_wq) destroy_workqueue(ic->commit_wq); if (ic->writer_wq) @@ -4200,7 +4202,7 @@ static void dm_integrity_dtr(struct dm_target *ti) static struct target_type integrity_target = { .name = "integrity", - .version = {1, 4, 0}, + .version = {1, 5, 0}, .module = THIS_MODULE, .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY, .ctr = dm_integrity_ctr, diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 2bc18c9c3abc..58fd137b6ae1 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -2053,7 +2053,7 @@ static int multipath_busy(struct dm_target *ti) *---------------------------------------------------------------*/ static struct target_type multipath_target = { .name = "multipath", - .version = {1, 13, 0}, + .version = {1, 14, 0}, .features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE | DM_TARGET_PASSES_INTEGRITY, .module = THIS_MODULE, diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index fc9947d6210c..76b6b323bf4b 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -960,9 +960,9 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd) DMWARN("%s: __commit_transaction() failed, error = %d", __func__, r); } + pmd_write_unlock(pmd); if (!pmd->fail_io) __destroy_persistent_data_objects(pmd); - pmd_write_unlock(pmd); kfree(pmd); return 0; diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 0d61e9c67986..eec9f252e935 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -1221,7 +1221,7 @@ bad: static struct target_type verity_target = { .name = "verity", - .version = {1, 5, 0}, + .version = {1, 6, 0}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr, diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index b9e27e37a943..a09bdc000e64 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -625,6 +625,12 @@ static void writecache_add_to_freelist(struct dm_writecache *wc, struct wc_entry wc->freelist_size++; } +static inline void writecache_verify_watermark(struct dm_writecache *wc) +{ + if (unlikely(wc->freelist_size + wc->writeback_size <= wc->freelist_high_watermark)) + queue_work(wc->writeback_wq, &wc->writeback_work); +} + static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc, sector_t expected_sector) { struct wc_entry *e; @@ -650,8 +656,8 @@ static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc, s list_del(&e->lru); } wc->freelist_size--; - if (unlikely(wc->freelist_size + wc->writeback_size <= wc->freelist_high_watermark)) - queue_work(wc->writeback_wq, &wc->writeback_work); + + writecache_verify_watermark(wc); return e; } @@ -842,7 +848,7 @@ static void writecache_suspend(struct dm_target *ti) } wc_unlock(wc); - flush_workqueue(wc->writeback_wq); + drain_workqueue(wc->writeback_wq); wc_lock(wc); if (flush_on_suspend) @@ -965,6 +971,8 @@ erase_this: writecache_commit_flushed(wc, false); } + writecache_verify_watermark(wc); + wc_unlock(wc); } @@ -2312,7 +2320,7 @@ static void writecache_status(struct dm_target *ti, status_type_t type, static struct target_type writecache_target = { .name = "writecache", - .version = {1, 1, 1}, + .version = {1, 2, 0}, .module = THIS_MODULE, .ctr = writecache_ctr, .dtr = writecache_dtr, diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 70a1063161c0..f4f83d39b3dc 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -533,8 +533,9 @@ static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio) /* Get the BIO chunk work. If one is not active yet, create one */ cw = radix_tree_lookup(&dmz->chunk_rxtree, chunk); - if (!cw) { - + if (cw) { + dmz_get_chunk_work(cw); + } else { /* Create a new chunk work */ cw = kmalloc(sizeof(struct dm_chunk_work), GFP_NOIO); if (unlikely(!cw)) { @@ -543,7 +544,7 @@ static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio) } INIT_WORK(&cw->work, dmz_chunk_work); - refcount_set(&cw->refcount, 0); + refcount_set(&cw->refcount, 1); cw->target = dmz; cw->chunk = chunk; bio_list_init(&cw->bio_list); @@ -556,7 +557,6 @@ static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio) } bio_list_add(&cw->bio_list, bio); - dmz_get_chunk_work(cw); dmz_reclaim_bio_acc(dmz->reclaim); if (queue_work(dmz->chunk_wq, &cw->work)) @@ -967,7 +967,7 @@ static int dmz_iterate_devices(struct dm_target *ti, static struct target_type dmz_type = { .name = "zoned", - .version = {1, 0, 0}, + .version = {1, 1, 0}, .features = DM_TARGET_SINGLETON | DM_TARGET_ZONED_HM, .module = THIS_MODULE, .ctr = dmz_ctr, diff --git a/drivers/md/dm.c b/drivers/md/dm.c index b89f07ee2eff..0413018c8305 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1788,7 +1788,8 @@ static int dm_any_congested(void *congested_data, int bdi_bits) * With request-based DM we only need to check the * top-level queue for congestion. */ - r = md->queue->backing_dev_info->wb.state & bdi_bits; + struct backing_dev_info *bdi = md->queue->backing_dev_info; + r = bdi->wb.congested->state & bdi_bits; } else { map = dm_get_live_table_fast(md); if (map) @@ -1854,15 +1855,6 @@ static const struct dax_operations dm_dax_ops; static void dm_wq_work(struct work_struct *work); -static void dm_init_normal_md_queue(struct mapped_device *md) -{ - /* - * Initialize aspects of queue that aren't relevant for blk-mq - */ - md->queue->backing_dev_info->congested_data = md; - md->queue->backing_dev_info->congested_fn = dm_any_congested; -} - static void cleanup_mapped_device(struct mapped_device *md) { if (md->wq) @@ -2249,6 +2241,12 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md) } EXPORT_SYMBOL_GPL(dm_get_queue_limits); +static void dm_init_congested_fn(struct mapped_device *md) +{ + md->queue->backing_dev_info->congested_data = md; + md->queue->backing_dev_info->congested_fn = dm_any_congested; +} + /* * Setup the DM device's queue based on md's type */ @@ -2265,11 +2263,12 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) DMERR("Cannot initialize queue for request-based dm-mq mapped device"); return r; } + dm_init_congested_fn(md); break; case DM_TYPE_BIO_BASED: case DM_TYPE_DAX_BIO_BASED: case DM_TYPE_NVME_BIO_BASED: - dm_init_normal_md_queue(md); + dm_init_congested_fn(md); break; case DM_TYPE_NONE: WARN_ON_ONCE(true); @@ -2368,6 +2367,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait) map = dm_get_live_table(md, &srcu_idx); if (!dm_suspended_md(md)) { dm_table_presuspend_targets(map); + set_bit(DMF_SUSPENDED, &md->flags); dm_table_postsuspend_targets(map); } /* dm_put_live_table must be before msleep, otherwise deadlock is possible */ diff --git a/drivers/media/mc/mc-entity.c b/drivers/media/mc/mc-entity.c index 7c429ce98bae..668770e9f609 100644 --- a/drivers/media/mc/mc-entity.c +++ b/drivers/media/mc/mc-entity.c @@ -639,9 +639,9 @@ int media_get_pad_index(struct media_entity *entity, bool is_sink, return -EINVAL; for (i = 0; i < entity->num_pads; i++) { - if (entity->pads[i].flags == MEDIA_PAD_FL_SINK) + if (entity->pads[i].flags & MEDIA_PAD_FL_SINK) pad_is_sink = true; - else if (entity->pads[i].flags == MEDIA_PAD_FL_SOURCE) + else if (entity->pads[i].flags & MEDIA_PAD_FL_SOURCE) pad_is_sink = false; else continue; /* This is an error! */ diff --git a/drivers/media/platform/vicodec/codec-v4l2-fwht.c b/drivers/media/platform/vicodec/codec-v4l2-fwht.c index 3c93d9232c3c..b6e39fbd8ad5 100644 --- a/drivers/media/platform/vicodec/codec-v4l2-fwht.c +++ b/drivers/media/platform/vicodec/codec-v4l2-fwht.c @@ -27,17 +27,17 @@ static const struct v4l2_fwht_pixfmt_info v4l2_fwht_pixfmts[] = { { V4L2_PIX_FMT_BGR24, 3, 3, 1, 3, 3, 1, 1, 3, 1, FWHT_FL_PIXENC_RGB}, { V4L2_PIX_FMT_RGB24, 3, 3, 1, 3, 3, 1, 1, 3, 1, FWHT_FL_PIXENC_RGB}, { V4L2_PIX_FMT_HSV24, 3, 3, 1, 3, 3, 1, 1, 3, 1, FWHT_FL_PIXENC_HSV}, - { V4L2_PIX_FMT_BGR32, 4, 4, 1, 4, 4, 1, 1, 3, 1, FWHT_FL_PIXENC_RGB}, - { V4L2_PIX_FMT_XBGR32, 4, 4, 1, 4, 4, 1, 1, 3, 1, FWHT_FL_PIXENC_RGB}, + { V4L2_PIX_FMT_BGR32, 4, 4, 1, 4, 4, 1, 1, 4, 1, FWHT_FL_PIXENC_RGB}, + { V4L2_PIX_FMT_XBGR32, 4, 4, 1, 4, 4, 1, 1, 4, 1, FWHT_FL_PIXENC_RGB}, { V4L2_PIX_FMT_ABGR32, 4, 4, 1, 4, 4, 1, 1, 4, 1, FWHT_FL_PIXENC_RGB}, - { V4L2_PIX_FMT_RGB32, 4, 4, 1, 4, 4, 1, 1, 3, 1, FWHT_FL_PIXENC_RGB}, - { V4L2_PIX_FMT_XRGB32, 4, 4, 1, 4, 4, 1, 1, 3, 1, FWHT_FL_PIXENC_RGB}, + { V4L2_PIX_FMT_RGB32, 4, 4, 1, 4, 4, 1, 1, 4, 1, FWHT_FL_PIXENC_RGB}, + { V4L2_PIX_FMT_XRGB32, 4, 4, 1, 4, 4, 1, 1, 4, 1, FWHT_FL_PIXENC_RGB}, { V4L2_PIX_FMT_ARGB32, 4, 4, 1, 4, 4, 1, 1, 4, 1, FWHT_FL_PIXENC_RGB}, - { V4L2_PIX_FMT_BGRX32, 4, 4, 1, 4, 4, 1, 1, 3, 1, FWHT_FL_PIXENC_RGB}, + { V4L2_PIX_FMT_BGRX32, 4, 4, 1, 4, 4, 1, 1, 4, 1, FWHT_FL_PIXENC_RGB}, { V4L2_PIX_FMT_BGRA32, 4, 4, 1, 4, 4, 1, 1, 4, 1, FWHT_FL_PIXENC_RGB}, - { V4L2_PIX_FMT_RGBX32, 4, 4, 1, 4, 4, 1, 1, 3, 1, FWHT_FL_PIXENC_RGB}, + { V4L2_PIX_FMT_RGBX32, 4, 4, 1, 4, 4, 1, 1, 4, 1, FWHT_FL_PIXENC_RGB}, { V4L2_PIX_FMT_RGBA32, 4, 4, 1, 4, 4, 1, 1, 4, 1, FWHT_FL_PIXENC_RGB}, - { V4L2_PIX_FMT_HSV32, 4, 4, 1, 4, 4, 1, 1, 3, 1, FWHT_FL_PIXENC_HSV}, + { V4L2_PIX_FMT_HSV32, 4, 4, 1, 4, 4, 1, 1, 4, 1, FWHT_FL_PIXENC_HSV}, { V4L2_PIX_FMT_GREY, 1, 1, 1, 1, 0, 1, 1, 1, 1, FWHT_FL_PIXENC_RGB}, }; @@ -175,22 +175,14 @@ static int prepare_raw_frame(struct fwht_raw_frame *rf, case V4L2_PIX_FMT_RGB32: case V4L2_PIX_FMT_XRGB32: case V4L2_PIX_FMT_HSV32: - rf->cr = rf->luma + 1; - rf->cb = rf->cr + 2; - rf->luma += 2; - break; - case V4L2_PIX_FMT_BGR32: - case V4L2_PIX_FMT_XBGR32: - rf->cb = rf->luma; - rf->cr = rf->cb + 2; - rf->luma++; - break; case V4L2_PIX_FMT_ARGB32: rf->alpha = rf->luma; rf->cr = rf->luma + 1; rf->cb = rf->cr + 2; rf->luma += 2; break; + case V4L2_PIX_FMT_BGR32: + case V4L2_PIX_FMT_XBGR32: case V4L2_PIX_FMT_ABGR32: rf->cb = rf->luma; rf->cr = rf->cb + 2; @@ -198,10 +190,6 @@ static int prepare_raw_frame(struct fwht_raw_frame *rf, rf->alpha = rf->cr + 1; break; case V4L2_PIX_FMT_BGRX32: - rf->cb = rf->luma + 1; - rf->cr = rf->cb + 2; - rf->luma += 2; - break; case V4L2_PIX_FMT_BGRA32: rf->alpha = rf->luma; rf->cb = rf->luma + 1; @@ -209,10 +197,6 @@ static int prepare_raw_frame(struct fwht_raw_frame *rf, rf->luma += 2; break; case V4L2_PIX_FMT_RGBX32: - rf->cr = rf->luma; - rf->cb = rf->cr + 2; - rf->luma++; - break; case V4L2_PIX_FMT_RGBA32: rf->alpha = rf->luma + 3; rf->cr = rf->luma; diff --git a/drivers/media/usb/pulse8-cec/pulse8-cec.c b/drivers/media/usb/pulse8-cec/pulse8-cec.c index afda438d4e0a..0655aa9ecf28 100644 --- a/drivers/media/usb/pulse8-cec/pulse8-cec.c +++ b/drivers/media/usb/pulse8-cec/pulse8-cec.c @@ -635,8 +635,6 @@ static void pulse8_cec_adap_free(struct cec_adapter *adap) cancel_delayed_work_sync(&pulse8->ping_eeprom_work); cancel_work_sync(&pulse8->irq_work); cancel_work_sync(&pulse8->tx_work); - serio_close(pulse8->serio); - serio_set_drvdata(pulse8->serio, NULL); kfree(pulse8); } @@ -652,6 +650,9 @@ static void pulse8_disconnect(struct serio *serio) struct pulse8 *pulse8 = serio_get_drvdata(serio); cec_unregister_adapter(pulse8->adap); + pulse8->serio = NULL; + serio_set_drvdata(serio, NULL); + serio_close(serio); } static int pulse8_setup(struct pulse8 *pulse8, struct serio *serio, @@ -840,6 +841,8 @@ static int pulse8_connect(struct serio *serio, struct serio_driver *drv) serio_set_drvdata(serio, pulse8); INIT_WORK(&pulse8->irq_work, pulse8_irq_work_handler); INIT_WORK(&pulse8->tx_work, pulse8_tx_work_handler); + INIT_DELAYED_WORK(&pulse8->ping_eeprom_work, + pulse8_ping_eeprom_work_handler); mutex_init(&pulse8->lock); spin_lock_init(&pulse8->msg_lock); pulse8->config_pending = false; @@ -865,17 +868,16 @@ static int pulse8_connect(struct serio *serio, struct serio_driver *drv) pulse8->restoring_config = true; } - INIT_DELAYED_WORK(&pulse8->ping_eeprom_work, - pulse8_ping_eeprom_work_handler); schedule_delayed_work(&pulse8->ping_eeprom_work, PING_PERIOD); return 0; close_serio: + pulse8->serio = NULL; + serio_set_drvdata(serio, NULL); serio_close(serio); delete_adap: cec_delete_adapter(pulse8->adap); - serio_set_drvdata(serio, NULL); free_device: kfree(pulse8); return err; diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c index 1afd9c6ad908..cc34c5ab7009 100644 --- a/drivers/media/v4l2-core/v4l2-mem2mem.c +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c @@ -880,12 +880,12 @@ int v4l2_m2m_register_media_controller(struct v4l2_m2m_dev *m2m_dev, goto err_rel_entity1; /* Connect the three entities */ - ret = media_create_pad_link(m2m_dev->source, 0, &m2m_dev->proc, 1, + ret = media_create_pad_link(m2m_dev->source, 0, &m2m_dev->proc, 0, MEDIA_LNK_FL_IMMUTABLE | MEDIA_LNK_FL_ENABLED); if (ret) goto err_rel_entity2; - ret = media_create_pad_link(&m2m_dev->proc, 0, &m2m_dev->sink, 0, + ret = media_create_pad_link(&m2m_dev->proc, 1, &m2m_dev->sink, 0, MEDIA_LNK_FL_IMMUTABLE | MEDIA_LNK_FL_ENABLED); if (ret) goto err_rm_links0; diff --git a/drivers/misc/altera-stapl/altera.c b/drivers/misc/altera-stapl/altera.c index 25e5f24b3fec..5bdf57472314 100644 --- a/drivers/misc/altera-stapl/altera.c +++ b/drivers/misc/altera-stapl/altera.c @@ -2112,8 +2112,8 @@ exit_done: return status; } -static int altera_get_note(u8 *p, s32 program_size, - s32 *offset, char *key, char *value, int length) +static int altera_get_note(u8 *p, s32 program_size, s32 *offset, + char *key, char *value, int keylen, int vallen) /* * Gets key and value of NOTE fields in the JBC file. * Can be called in two modes: if offset pointer is NULL, @@ -2170,7 +2170,7 @@ static int altera_get_note(u8 *p, s32 program_size, &p[note_table + (8 * i) + 4])]; if (value != NULL) - strlcpy(value, value_ptr, length); + strlcpy(value, value_ptr, vallen); } } @@ -2189,13 +2189,13 @@ static int altera_get_note(u8 *p, s32 program_size, strlcpy(key, &p[note_strings + get_unaligned_be32( &p[note_table + (8 * i)])], - length); + keylen); if (value != NULL) strlcpy(value, &p[note_strings + get_unaligned_be32( &p[note_table + (8 * i) + 4])], - length); + vallen); *offset = i + 1; } @@ -2449,7 +2449,7 @@ int altera_init(struct altera_config *config, const struct firmware *fw) __func__, (format_version == 2) ? "Jam STAPL" : "pre-standardized Jam 1.1"); while (altera_get_note((u8 *)fw->data, fw->size, - &offset, key, value, 256) == 0) + &offset, key, value, 32, 256) == 0) printk(KERN_INFO "%s: NOTE \"%s\" = \"%s\"\n", __func__, key, value); } diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index c3a160c18047..3955fa5db43c 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -1590,7 +1590,7 @@ static u32 sdhci_msm_cqe_irq(struct sdhci_host *host, u32 intmask) return 0; } -void sdhci_msm_cqe_disable(struct mmc_host *mmc, bool recovery) +static void sdhci_msm_cqe_disable(struct mmc_host *mmc, bool recovery) { struct sdhci_host *host = mmc_priv(mmc); unsigned long flags; diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index 5eea8d70a85d..ce15a05f23d4 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -262,10 +262,26 @@ static int gl9750_execute_tuning(struct sdhci_host *host, u32 opcode) return 0; } +static void gli_pcie_enable_msi(struct sdhci_pci_slot *slot) +{ + int ret; + + ret = pci_alloc_irq_vectors(slot->chip->pdev, 1, 1, + PCI_IRQ_MSI | PCI_IRQ_MSIX); + if (ret < 0) { + pr_warn("%s: enable PCI MSI failed, error=%d\n", + mmc_hostname(slot->host->mmc), ret); + return; + } + + slot->host->irq = pci_irq_vector(slot->chip->pdev, 0); +} + static int gli_probe_slot_gl9750(struct sdhci_pci_slot *slot) { struct sdhci_host *host = slot->host; + gli_pcie_enable_msi(slot); slot->host->mmc->caps2 |= MMC_CAP2_NO_SDIO; sdhci_enable_v4_mode(host); @@ -276,6 +292,7 @@ static int gli_probe_slot_gl9755(struct sdhci_pci_slot *slot) { struct sdhci_host *host = slot->host; + gli_pcie_enable_msi(slot); slot->host->mmc->caps2 |= MMC_CAP2_NO_SDIO; sdhci_enable_v4_mode(host); diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 1cc2cd894f87..c81698550e5a 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -50,11 +50,6 @@ struct arp_pkt { }; #pragma pack() -static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb) -{ - return (struct arp_pkt *)skb_network_header(skb); -} - /* Forward declaration */ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], bool strict_match); @@ -553,10 +548,11 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip) spin_unlock(&bond->mode_lock); } -static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond) +static struct slave *rlb_choose_channel(struct sk_buff *skb, + struct bonding *bond, + const struct arp_pkt *arp) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); - struct arp_pkt *arp = arp_pkt(skb); struct slave *assigned_slave, *curr_active_slave; struct rlb_client_info *client_info; u32 hash_index = 0; @@ -653,8 +649,12 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon */ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) { - struct arp_pkt *arp = arp_pkt(skb); struct slave *tx_slave = NULL; + struct arp_pkt *arp; + + if (!pskb_network_may_pull(skb, sizeof(*arp))) + return NULL; + arp = (struct arp_pkt *)skb_network_header(skb); /* Don't modify or load balance ARPs that do not originate locally * (e.g.,arrive via a bridge). @@ -664,7 +664,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) if (arp->op_code == htons(ARPOP_REPLY)) { /* the arp must be sent on the selected rx channel */ - tx_slave = rlb_choose_channel(skb, bond); + tx_slave = rlb_choose_channel(skb, bond, arp); if (tx_slave) bond_hw_addr_copy(arp->mac_src, tx_slave->dev->dev_addr, tx_slave->dev->addr_len); @@ -676,7 +676,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) * When the arp reply is received the entry will be updated * with the correct unicast address of the client. */ - tx_slave = rlb_choose_channel(skb, bond); + tx_slave = rlb_choose_channel(skb, bond, arp); /* The ARP reply packets must be delayed so that * they can cancel out the influence of the ARP request. diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 6ee06a49fb4c..68834a2853c9 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -883,6 +883,7 @@ static const struct nla_policy can_policy[IFLA_CAN_MAX + 1] = { = { .len = sizeof(struct can_bittiming) }, [IFLA_CAN_DATA_BITTIMING_CONST] = { .len = sizeof(struct can_bittiming_const) }, + [IFLA_CAN_TERMINATION] = { .type = NLA_U16 }, }; static int can_validate(struct nlattr *tb[], struct nlattr *data[], diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 483db9d133c3..221593261e8f 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -397,32 +397,35 @@ static void mv88e6xxx_irq_poll_free(struct mv88e6xxx_chip *chip) mv88e6xxx_reg_unlock(chip); } -int mv88e6xxx_port_setup_mac(struct mv88e6xxx_chip *chip, int port, int link, - int speed, int duplex, int pause, - phy_interface_t mode) +static int mv88e6xxx_port_config_interface(struct mv88e6xxx_chip *chip, + int port, phy_interface_t interface) { - struct phylink_link_state state; int err; - if (!chip->info->ops->port_set_link) - return 0; + if (chip->info->ops->port_set_rgmii_delay) { + err = chip->info->ops->port_set_rgmii_delay(chip, port, + interface); + if (err && err != -EOPNOTSUPP) + return err; + } - if (!chip->info->ops->port_link_state) - return 0; + if (chip->info->ops->port_set_cmode) { + err = chip->info->ops->port_set_cmode(chip, port, + interface); + if (err && err != -EOPNOTSUPP) + return err; + } - err = chip->info->ops->port_link_state(chip, port, &state); - if (err) - return err; + return 0; +} - /* Has anything actually changed? We don't expect the - * interface mode to change without one of the other - * parameters also changing - */ - if (state.link == link && - state.speed == speed && - state.duplex == duplex && - (state.interface == mode || - state.interface == PHY_INTERFACE_MODE_NA)) +static int mv88e6xxx_port_setup_mac(struct mv88e6xxx_chip *chip, int port, + int link, int speed, int duplex, int pause, + phy_interface_t mode) +{ + int err; + + if (!chip->info->ops->port_set_link) return 0; /* Port's MAC control must not be changed unless the link is down */ @@ -430,8 +433,9 @@ int mv88e6xxx_port_setup_mac(struct mv88e6xxx_chip *chip, int port, int link, if (err) return err; - if (chip->info->ops->port_set_speed) { - err = chip->info->ops->port_set_speed(chip, port, speed); + if (chip->info->ops->port_set_speed_duplex) { + err = chip->info->ops->port_set_speed_duplex(chip, port, + speed, duplex); if (err && err != -EOPNOTSUPP) goto restore_link; } @@ -445,25 +449,7 @@ int mv88e6xxx_port_setup_mac(struct mv88e6xxx_chip *chip, int port, int link, goto restore_link; } - if (chip->info->ops->port_set_duplex) { - err = chip->info->ops->port_set_duplex(chip, port, duplex); - if (err && err != -EOPNOTSUPP) - goto restore_link; - } - - if (chip->info->ops->port_set_rgmii_delay) { - err = chip->info->ops->port_set_rgmii_delay(chip, port, mode); - if (err && err != -EOPNOTSUPP) - goto restore_link; - } - - if (chip->info->ops->port_set_cmode) { - err = chip->info->ops->port_set_cmode(chip, port, mode); - if (err && err != -EOPNOTSUPP) - goto restore_link; - } - - err = 0; + err = mv88e6xxx_port_config_interface(chip, port, mode); restore_link: if (chip->info->ops->port_set_link(chip, port, link)) dev_err(chip->dev, "p%d: failed to restore MAC's link\n", port); @@ -478,6 +464,97 @@ static int mv88e6xxx_phy_is_internal(struct dsa_switch *ds, int port) return port < chip->info->num_internal_phys; } +static int mv88e6xxx_port_ppu_updates(struct mv88e6xxx_chip *chip, int port) +{ + u16 reg; + int err; + + err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, ®); + if (err) { + dev_err(chip->dev, + "p%d: %s: failed to read port status\n", + port, __func__); + return err; + } + + return !!(reg & MV88E6XXX_PORT_STS_PHY_DETECT); +} + +static int mv88e6xxx_serdes_pcs_get_state(struct dsa_switch *ds, int port, + struct phylink_link_state *state) +{ + struct mv88e6xxx_chip *chip = ds->priv; + u8 lane; + int err; + + mv88e6xxx_reg_lock(chip); + lane = mv88e6xxx_serdes_get_lane(chip, port); + if (lane && chip->info->ops->serdes_pcs_get_state) + err = chip->info->ops->serdes_pcs_get_state(chip, port, lane, + state); + else + err = -EOPNOTSUPP; + mv88e6xxx_reg_unlock(chip); + + return err; +} + +static int mv88e6xxx_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port, + unsigned int mode, + phy_interface_t interface, + const unsigned long *advertise) +{ + const struct mv88e6xxx_ops *ops = chip->info->ops; + u8 lane; + + if (ops->serdes_pcs_config) { + lane = mv88e6xxx_serdes_get_lane(chip, port); + if (lane) + return ops->serdes_pcs_config(chip, port, lane, mode, + interface, advertise); + } + + return 0; +} + +static void mv88e6xxx_serdes_pcs_an_restart(struct dsa_switch *ds, int port) +{ + struct mv88e6xxx_chip *chip = ds->priv; + const struct mv88e6xxx_ops *ops; + int err = 0; + u8 lane; + + ops = chip->info->ops; + + if (ops->serdes_pcs_an_restart) { + mv88e6xxx_reg_lock(chip); + lane = mv88e6xxx_serdes_get_lane(chip, port); + if (lane) + err = ops->serdes_pcs_an_restart(chip, port, lane); + mv88e6xxx_reg_unlock(chip); + + if (err) + dev_err(ds->dev, "p%d: failed to restart AN\n", port); + } +} + +static int mv88e6xxx_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port, + unsigned int mode, + int speed, int duplex) +{ + const struct mv88e6xxx_ops *ops = chip->info->ops; + u8 lane; + + if (!phylink_autoneg_inband(mode) && ops->serdes_pcs_link_up) { + lane = mv88e6xxx_serdes_get_lane(chip, port); + if (lane) + return ops->serdes_pcs_link_up(chip, port, lane, + speed, duplex); + } + + return 0; +} + static void mv88e6065_phylink_validate(struct mv88e6xxx_chip *chip, int port, unsigned long *mask, struct phylink_link_state *state) @@ -582,54 +659,43 @@ static void mv88e6xxx_validate(struct dsa_switch *ds, int port, phylink_helper_basex_speed(state); } -static int mv88e6xxx_link_state(struct dsa_switch *ds, int port, - struct phylink_link_state *state) -{ - struct mv88e6xxx_chip *chip = ds->priv; - int err; - - mv88e6xxx_reg_lock(chip); - if (chip->info->ops->port_link_state) - err = chip->info->ops->port_link_state(chip, port, state); - else - err = -EOPNOTSUPP; - mv88e6xxx_reg_unlock(chip); - - return err; -} - static void mv88e6xxx_mac_config(struct dsa_switch *ds, int port, unsigned int mode, const struct phylink_link_state *state) { struct mv88e6xxx_chip *chip = ds->priv; - int speed, duplex, link, pause, err; + int err; + /* FIXME: is this the correct test? If we're in fixed mode on an + * internal port, why should we process this any different from + * PHY mode? On the other hand, the port may be automedia between + * an internal PHY and the serdes... + */ if ((mode == MLO_AN_PHY) && mv88e6xxx_phy_is_internal(ds, port)) return; - if (mode == MLO_AN_FIXED) { - link = LINK_FORCED_UP; - speed = state->speed; - duplex = state->duplex; - } else if (!mv88e6xxx_phy_is_internal(ds, port)) { - link = state->link; - speed = state->speed; - duplex = state->duplex; - } else { - speed = SPEED_UNFORCED; - duplex = DUPLEX_UNFORCED; - link = LINK_UNFORCED; - } - pause = !!phylink_test(state->advertising, Pause); - mv88e6xxx_reg_lock(chip); - err = mv88e6xxx_port_setup_mac(chip, port, link, speed, duplex, pause, - state->interface); + /* FIXME: should we force the link down here - but if we do, how + * do we restore the link force/unforce state? The driver layering + * gets in the way. + */ + err = mv88e6xxx_port_config_interface(chip, port, state->interface); + if (err && err != -EOPNOTSUPP) + goto err_unlock; + + err = mv88e6xxx_serdes_pcs_config(chip, port, mode, state->interface, + state->advertising); + /* FIXME: we should restart negotiation if something changed - which + * is something we get if we convert to using phylinks PCS operations. + */ + if (err > 0) + err = 0; + +err_unlock: mv88e6xxx_reg_unlock(chip); if (err && err != -EOPNOTSUPP) - dev_err(ds->dev, "p%d: failed to configure MAC\n", port); + dev_err(ds->dev, "p%d: failed to configure MAC/PCS\n", port); } static void mv88e6xxx_mac_link_down(struct dsa_switch *ds, int port, @@ -642,20 +708,14 @@ static void mv88e6xxx_mac_link_down(struct dsa_switch *ds, int port, ops = chip->info->ops; - /* Internal PHYs propagate their configuration directly to the MAC. - * External PHYs depend on whether the PPU is enabled for this port. - * FIXME: we should be using the PPU enable state here. What about - * an automedia port? - */ - if (!mv88e6xxx_phy_is_internal(ds, port) && ops->port_set_link) { - mv88e6xxx_reg_lock(chip); + mv88e6xxx_reg_lock(chip); + if (!mv88e6xxx_port_ppu_updates(chip, port) && ops->port_set_link) err = ops->port_set_link(chip, port, LINK_FORCED_DOWN); - mv88e6xxx_reg_unlock(chip); + mv88e6xxx_reg_unlock(chip); - if (err) - dev_err(chip->dev, - "p%d: failed to force MAC link down\n", port); - } + if (err) + dev_err(chip->dev, + "p%d: failed to force MAC link down\n", port); } static void mv88e6xxx_mac_link_up(struct dsa_switch *ds, int port, @@ -670,40 +730,35 @@ static void mv88e6xxx_mac_link_up(struct dsa_switch *ds, int port, ops = chip->info->ops; - /* Internal PHYs propagate their configuration directly to the MAC. - * External PHYs depend on whether the PPU is enabled for this port. - * FIXME: we should be using the PPU enable state here. What about - * an automedia port? - */ - if (!mv88e6xxx_phy_is_internal(ds, port)) { - mv88e6xxx_reg_lock(chip); + mv88e6xxx_reg_lock(chip); + if (!mv88e6xxx_port_ppu_updates(chip, port)) { /* FIXME: for an automedia port, should we force the link * down here - what if the link comes up due to "other" media * while we're bringing the port up, how is the exclusivity - * handled in the Marvell hardware? E.g. port 4 on 88E6532 + * handled in the Marvell hardware? E.g. port 2 on 88E6390 * shared between internal PHY and Serdes. */ - if (ops->port_set_speed) { - err = ops->port_set_speed(chip, port, speed); - if (err && err != -EOPNOTSUPP) - goto error; - } + err = mv88e6xxx_serdes_pcs_link_up(chip, port, mode, speed, + duplex); + if (err) + goto error; - if (ops->port_set_duplex) { - err = ops->port_set_duplex(chip, port, duplex); + if (ops->port_set_speed_duplex) { + err = ops->port_set_speed_duplex(chip, port, + speed, duplex); if (err && err != -EOPNOTSUPP) goto error; } if (ops->port_set_link) err = ops->port_set_link(chip, port, LINK_FORCED_UP); + } error: - mv88e6xxx_reg_unlock(chip); + mv88e6xxx_reg_unlock(chip); - if (err && err != -EOPNOTSUPP) - dev_err(ds->dev, - "p%d: failed to configure MAC link up\n", port); - } + if (err && err != -EOPNOTSUPP) + dev_err(ds->dev, + "p%d: failed to configure MAC link up\n", port); } static int mv88e6xxx_stats_snapshot(struct mv88e6xxx_chip *chip, int port) @@ -2830,6 +2885,8 @@ static u64 mv88e6xxx_devlink_atu_bin_get(struct mv88e6xxx_chip *chip, goto unlock; } + occupancy &= MV88E6XXX_G2_ATU_STATS_MASK; + unlock: mv88e6xxx_reg_unlock(chip); @@ -3331,8 +3388,7 @@ static const struct mv88e6xxx_ops mv88e6085_ops = { .phy_read = mv88e6185_phy_ppu_read, .phy_write = mv88e6185_phy_ppu_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, - .port_set_speed = mv88e6185_port_set_speed, + .port_set_speed_duplex = mv88e6185_port_set_speed_duplex, .port_tag_remap = mv88e6095_port_tag_remap, .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_egress_floods = mv88e6352_port_set_egress_floods, @@ -3341,7 +3397,6 @@ static const struct mv88e6xxx_ops mv88e6085_ops = { .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, - .port_link_state = mv88e6352_port_link_state, .port_get_cmode = mv88e6185_port_get_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, .stats_snapshot = mv88e6xxx_g1_stats_snapshot, @@ -3371,12 +3426,10 @@ static const struct mv88e6xxx_ops mv88e6095_ops = { .phy_read = mv88e6185_phy_ppu_read, .phy_write = mv88e6185_phy_ppu_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, - .port_set_speed = mv88e6185_port_set_speed, + .port_set_speed_duplex = mv88e6185_port_set_speed_duplex, .port_set_frame_mode = mv88e6085_port_set_frame_mode, .port_set_egress_floods = mv88e6185_port_set_egress_floods, .port_set_upstream_port = mv88e6095_port_set_upstream_port, - .port_link_state = mv88e6185_port_link_state, .port_get_cmode = mv88e6185_port_get_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, .stats_snapshot = mv88e6xxx_g1_stats_snapshot, @@ -3402,8 +3455,7 @@ static const struct mv88e6xxx_ops mv88e6097_ops = { .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, - .port_set_speed = mv88e6185_port_set_speed, + .port_set_speed_duplex = mv88e6185_port_set_speed_duplex, .port_tag_remap = mv88e6095_port_tag_remap, .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_egress_floods = mv88e6352_port_set_egress_floods, @@ -3413,7 +3465,6 @@ static const struct mv88e6xxx_ops mv88e6097_ops = { .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, - .port_link_state = mv88e6352_port_link_state, .port_get_cmode = mv88e6185_port_get_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, .stats_snapshot = mv88e6xxx_g1_stats_snapshot, @@ -3442,13 +3493,11 @@ static const struct mv88e6xxx_ops mv88e6123_ops = { .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, - .port_set_speed = mv88e6185_port_set_speed, + .port_set_speed_duplex = mv88e6185_port_set_speed_duplex, .port_set_frame_mode = mv88e6085_port_set_frame_mode, .port_set_egress_floods = mv88e6352_port_set_egress_floods, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, - .port_link_state = mv88e6352_port_link_state, .port_get_cmode = mv88e6185_port_get_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, .stats_snapshot = mv88e6320_g1_stats_snapshot, @@ -3477,8 +3526,7 @@ static const struct mv88e6xxx_ops mv88e6131_ops = { .phy_read = mv88e6185_phy_ppu_read, .phy_write = mv88e6185_phy_ppu_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, - .port_set_speed = mv88e6185_port_set_speed, + .port_set_speed_duplex = mv88e6185_port_set_speed_duplex, .port_tag_remap = mv88e6095_port_tag_remap, .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_egress_floods = mv88e6185_port_set_egress_floods, @@ -3488,7 +3536,6 @@ static const struct mv88e6xxx_ops mv88e6131_ops = { .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting, .port_pause_limit = mv88e6097_port_pause_limit, .port_set_pause = mv88e6185_port_set_pause, - .port_link_state = mv88e6352_port_link_state, .port_get_cmode = mv88e6185_port_get_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, .stats_snapshot = mv88e6xxx_g1_stats_snapshot, @@ -3520,9 +3567,8 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, - .port_set_speed = mv88e6341_port_set_speed, + .port_set_speed_duplex = mv88e6341_port_set_speed_duplex, .port_max_speed_mode = mv88e6341_port_max_speed_mode, .port_tag_remap = mv88e6095_port_tag_remap, .port_set_frame_mode = mv88e6351_port_set_frame_mode, @@ -3533,7 +3579,6 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, - .port_link_state = mv88e6352_port_link_state, .port_get_cmode = mv88e6352_port_get_cmode, .port_set_cmode = mv88e6341_port_set_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, @@ -3552,6 +3597,11 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .serdes_power = mv88e6390_serdes_power, .serdes_get_lane = mv88e6341_serdes_get_lane, + /* Check status register pause & lpa register */ + .serdes_pcs_get_state = mv88e6390_serdes_pcs_get_state, + .serdes_pcs_config = mv88e6390_serdes_pcs_config, + .serdes_pcs_an_restart = mv88e6390_serdes_pcs_an_restart, + .serdes_pcs_link_up = mv88e6390_serdes_pcs_link_up, .serdes_irq_mapping = mv88e6390_serdes_irq_mapping, .serdes_irq_enable = mv88e6390_serdes_irq_enable, .serdes_irq_status = mv88e6390_serdes_irq_status, @@ -3568,8 +3618,7 @@ static const struct mv88e6xxx_ops mv88e6161_ops = { .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, - .port_set_speed = mv88e6185_port_set_speed, + .port_set_speed_duplex = mv88e6185_port_set_speed_duplex, .port_tag_remap = mv88e6095_port_tag_remap, .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_egress_floods = mv88e6352_port_set_egress_floods, @@ -3579,7 +3628,6 @@ static const struct mv88e6xxx_ops mv88e6161_ops = { .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, - .port_link_state = mv88e6352_port_link_state, .port_get_cmode = mv88e6185_port_get_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, .stats_snapshot = mv88e6xxx_g1_stats_snapshot, @@ -3611,11 +3659,9 @@ static const struct mv88e6xxx_ops mv88e6165_ops = { .phy_read = mv88e6165_phy_read, .phy_write = mv88e6165_phy_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, - .port_set_speed = mv88e6185_port_set_speed, + .port_set_speed_duplex = mv88e6185_port_set_speed_duplex, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, - .port_link_state = mv88e6352_port_link_state, .port_get_cmode = mv88e6185_port_get_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, .stats_snapshot = mv88e6xxx_g1_stats_snapshot, @@ -3647,9 +3693,8 @@ static const struct mv88e6xxx_ops mv88e6171_ops = { .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, - .port_set_speed = mv88e6185_port_set_speed, + .port_set_speed_duplex = mv88e6185_port_set_speed_duplex, .port_tag_remap = mv88e6095_port_tag_remap, .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_egress_floods = mv88e6352_port_set_egress_floods, @@ -3659,7 +3704,6 @@ static const struct mv88e6xxx_ops mv88e6171_ops = { .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, - .port_link_state = mv88e6352_port_link_state, .port_get_cmode = mv88e6352_port_get_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, .stats_snapshot = mv88e6320_g1_stats_snapshot, @@ -3691,9 +3735,8 @@ static const struct mv88e6xxx_ops mv88e6172_ops = { .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, - .port_set_speed = mv88e6352_port_set_speed, + .port_set_speed_duplex = mv88e6352_port_set_speed_duplex, .port_tag_remap = mv88e6095_port_tag_remap, .port_set_policy = mv88e6352_port_set_policy, .port_set_frame_mode = mv88e6351_port_set_frame_mode, @@ -3704,7 +3747,6 @@ static const struct mv88e6xxx_ops mv88e6172_ops = { .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, - .port_link_state = mv88e6352_port_link_state, .port_get_cmode = mv88e6352_port_get_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, .stats_snapshot = mv88e6320_g1_stats_snapshot, @@ -3724,6 +3766,10 @@ static const struct mv88e6xxx_ops mv88e6172_ops = { .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .serdes_get_lane = mv88e6352_serdes_get_lane, + .serdes_pcs_get_state = mv88e6352_serdes_pcs_get_state, + .serdes_pcs_config = mv88e6352_serdes_pcs_config, + .serdes_pcs_an_restart = mv88e6352_serdes_pcs_an_restart, + .serdes_pcs_link_up = mv88e6352_serdes_pcs_link_up, .serdes_power = mv88e6352_serdes_power, .serdes_get_regs_len = mv88e6352_serdes_get_regs_len, .serdes_get_regs = mv88e6352_serdes_get_regs, @@ -3740,9 +3786,8 @@ static const struct mv88e6xxx_ops mv88e6175_ops = { .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, - .port_set_speed = mv88e6185_port_set_speed, + .port_set_speed_duplex = mv88e6185_port_set_speed_duplex, .port_tag_remap = mv88e6095_port_tag_remap, .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_egress_floods = mv88e6352_port_set_egress_floods, @@ -3752,7 +3797,6 @@ static const struct mv88e6xxx_ops mv88e6175_ops = { .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, - .port_link_state = mv88e6352_port_link_state, .port_get_cmode = mv88e6352_port_get_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, .stats_snapshot = mv88e6320_g1_stats_snapshot, @@ -3784,9 +3828,8 @@ static const struct mv88e6xxx_ops mv88e6176_ops = { .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, - .port_set_speed = mv88e6352_port_set_speed, + .port_set_speed_duplex = mv88e6352_port_set_speed_duplex, .port_tag_remap = mv88e6095_port_tag_remap, .port_set_policy = mv88e6352_port_set_policy, .port_set_frame_mode = mv88e6351_port_set_frame_mode, @@ -3797,7 +3840,6 @@ static const struct mv88e6xxx_ops mv88e6176_ops = { .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, - .port_link_state = mv88e6352_port_link_state, .port_get_cmode = mv88e6352_port_get_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, .stats_snapshot = mv88e6320_g1_stats_snapshot, @@ -3817,6 +3859,10 @@ static const struct mv88e6xxx_ops mv88e6176_ops = { .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .serdes_get_lane = mv88e6352_serdes_get_lane, + .serdes_pcs_get_state = mv88e6352_serdes_pcs_get_state, + .serdes_pcs_config = mv88e6352_serdes_pcs_config, + .serdes_pcs_an_restart = mv88e6352_serdes_pcs_an_restart, + .serdes_pcs_link_up = mv88e6352_serdes_pcs_link_up, .serdes_power = mv88e6352_serdes_power, .serdes_irq_mapping = mv88e6352_serdes_irq_mapping, .serdes_irq_enable = mv88e6352_serdes_irq_enable, @@ -3835,14 +3881,12 @@ static const struct mv88e6xxx_ops mv88e6185_ops = { .phy_read = mv88e6185_phy_ppu_read, .phy_write = mv88e6185_phy_ppu_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, - .port_set_speed = mv88e6185_port_set_speed, + .port_set_speed_duplex = mv88e6185_port_set_speed_duplex, .port_set_frame_mode = mv88e6085_port_set_frame_mode, .port_set_egress_floods = mv88e6185_port_set_egress_floods, .port_egress_rate_limiting = mv88e6095_port_egress_rate_limiting, .port_set_upstream_port = mv88e6095_port_set_upstream_port, .port_set_pause = mv88e6185_port_set_pause, - .port_link_state = mv88e6185_port_link_state, .port_get_cmode = mv88e6185_port_get_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, .stats_snapshot = mv88e6xxx_g1_stats_snapshot, @@ -3873,9 +3917,8 @@ static const struct mv88e6xxx_ops mv88e6190_ops = { .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, - .port_set_speed = mv88e6390_port_set_speed, + .port_set_speed_duplex = mv88e6390_port_set_speed_duplex, .port_max_speed_mode = mv88e6390_port_max_speed_mode, .port_tag_remap = mv88e6390_port_tag_remap, .port_set_policy = mv88e6352_port_set_policy, @@ -3885,7 +3928,6 @@ static const struct mv88e6xxx_ops mv88e6190_ops = { .port_pause_limit = mv88e6390_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, - .port_link_state = mv88e6352_port_link_state, .port_get_cmode = mv88e6352_port_get_cmode, .port_set_cmode = mv88e6390_port_set_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, @@ -3907,6 +3949,11 @@ static const struct mv88e6xxx_ops mv88e6190_ops = { .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, .serdes_power = mv88e6390_serdes_power, .serdes_get_lane = mv88e6390_serdes_get_lane, + /* Check status register pause & lpa register */ + .serdes_pcs_get_state = mv88e6390_serdes_pcs_get_state, + .serdes_pcs_config = mv88e6390_serdes_pcs_config, + .serdes_pcs_an_restart = mv88e6390_serdes_pcs_an_restart, + .serdes_pcs_link_up = mv88e6390_serdes_pcs_link_up, .serdes_irq_mapping = mv88e6390_serdes_irq_mapping, .serdes_irq_enable = mv88e6390_serdes_irq_enable, .serdes_irq_status = mv88e6390_serdes_irq_status, @@ -3929,9 +3976,8 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = { .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, - .port_set_speed = mv88e6390x_port_set_speed, + .port_set_speed_duplex = mv88e6390x_port_set_speed_duplex, .port_max_speed_mode = mv88e6390x_port_max_speed_mode, .port_tag_remap = mv88e6390_port_tag_remap, .port_set_policy = mv88e6352_port_set_policy, @@ -3941,7 +3987,6 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = { .port_pause_limit = mv88e6390_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, - .port_link_state = mv88e6352_port_link_state, .port_get_cmode = mv88e6352_port_get_cmode, .port_set_cmode = mv88e6390x_port_set_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, @@ -3963,6 +4008,11 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = { .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, .serdes_power = mv88e6390_serdes_power, .serdes_get_lane = mv88e6390x_serdes_get_lane, + /* Check status register pause & lpa register */ + .serdes_pcs_get_state = mv88e6390_serdes_pcs_get_state, + .serdes_pcs_config = mv88e6390_serdes_pcs_config, + .serdes_pcs_an_restart = mv88e6390_serdes_pcs_an_restart, + .serdes_pcs_link_up = mv88e6390_serdes_pcs_link_up, .serdes_irq_mapping = mv88e6390_serdes_irq_mapping, .serdes_irq_enable = mv88e6390_serdes_irq_enable, .serdes_irq_status = mv88e6390_serdes_irq_status, @@ -3985,9 +4035,8 @@ static const struct mv88e6xxx_ops mv88e6191_ops = { .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, - .port_set_speed = mv88e6390_port_set_speed, + .port_set_speed_duplex = mv88e6390_port_set_speed_duplex, .port_max_speed_mode = mv88e6390_port_max_speed_mode, .port_tag_remap = mv88e6390_port_tag_remap, .port_set_frame_mode = mv88e6351_port_set_frame_mode, @@ -3996,7 +4045,6 @@ static const struct mv88e6xxx_ops mv88e6191_ops = { .port_pause_limit = mv88e6390_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, - .port_link_state = mv88e6352_port_link_state, .port_get_cmode = mv88e6352_port_get_cmode, .port_set_cmode = mv88e6390_port_set_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, @@ -4018,6 +4066,11 @@ static const struct mv88e6xxx_ops mv88e6191_ops = { .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, .serdes_power = mv88e6390_serdes_power, .serdes_get_lane = mv88e6390_serdes_get_lane, + /* Check status register pause & lpa register */ + .serdes_pcs_get_state = mv88e6390_serdes_pcs_get_state, + .serdes_pcs_config = mv88e6390_serdes_pcs_config, + .serdes_pcs_an_restart = mv88e6390_serdes_pcs_an_restart, + .serdes_pcs_link_up = mv88e6390_serdes_pcs_link_up, .serdes_irq_mapping = mv88e6390_serdes_irq_mapping, .serdes_irq_enable = mv88e6390_serdes_irq_enable, .serdes_irq_status = mv88e6390_serdes_irq_status, @@ -4042,9 +4095,8 @@ static const struct mv88e6xxx_ops mv88e6240_ops = { .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, - .port_set_speed = mv88e6352_port_set_speed, + .port_set_speed_duplex = mv88e6352_port_set_speed_duplex, .port_tag_remap = mv88e6095_port_tag_remap, .port_set_policy = mv88e6352_port_set_policy, .port_set_frame_mode = mv88e6351_port_set_frame_mode, @@ -4055,7 +4107,6 @@ static const struct mv88e6xxx_ops mv88e6240_ops = { .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, - .port_link_state = mv88e6352_port_link_state, .port_get_cmode = mv88e6352_port_get_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, .stats_snapshot = mv88e6320_g1_stats_snapshot, @@ -4075,6 +4126,10 @@ static const struct mv88e6xxx_ops mv88e6240_ops = { .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .serdes_get_lane = mv88e6352_serdes_get_lane, + .serdes_pcs_get_state = mv88e6352_serdes_pcs_get_state, + .serdes_pcs_config = mv88e6352_serdes_pcs_config, + .serdes_pcs_an_restart = mv88e6352_serdes_pcs_an_restart, + .serdes_pcs_link_up = mv88e6352_serdes_pcs_link_up, .serdes_power = mv88e6352_serdes_power, .serdes_irq_mapping = mv88e6352_serdes_irq_mapping, .serdes_irq_enable = mv88e6352_serdes_irq_enable, @@ -4098,9 +4153,8 @@ static const struct mv88e6xxx_ops mv88e6250_ops = { .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, - .port_set_speed = mv88e6250_port_set_speed, + .port_set_speed_duplex = mv88e6250_port_set_speed_duplex, .port_tag_remap = mv88e6095_port_tag_remap, .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_egress_floods = mv88e6352_port_set_egress_floods, @@ -4108,7 +4162,6 @@ static const struct mv88e6xxx_ops mv88e6250_ops = { .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting, .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, - .port_link_state = mv88e6250_port_link_state, .stats_snapshot = mv88e6320_g1_stats_snapshot, .stats_set_histogram = mv88e6095_g1_stats_set_histogram, .stats_get_sset_count = mv88e6250_stats_get_sset_count, @@ -4137,9 +4190,8 @@ static const struct mv88e6xxx_ops mv88e6290_ops = { .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, - .port_set_speed = mv88e6390_port_set_speed, + .port_set_speed_duplex = mv88e6390_port_set_speed_duplex, .port_max_speed_mode = mv88e6390_port_max_speed_mode, .port_tag_remap = mv88e6390_port_tag_remap, .port_set_policy = mv88e6352_port_set_policy, @@ -4149,7 +4201,6 @@ static const struct mv88e6xxx_ops mv88e6290_ops = { .port_pause_limit = mv88e6390_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, - .port_link_state = mv88e6352_port_link_state, .port_get_cmode = mv88e6352_port_get_cmode, .port_set_cmode = mv88e6390_port_set_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, @@ -4171,6 +4222,11 @@ static const struct mv88e6xxx_ops mv88e6290_ops = { .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, .serdes_power = mv88e6390_serdes_power, .serdes_get_lane = mv88e6390_serdes_get_lane, + /* Check status register pause & lpa register */ + .serdes_pcs_get_state = mv88e6390_serdes_pcs_get_state, + .serdes_pcs_config = mv88e6390_serdes_pcs_config, + .serdes_pcs_an_restart = mv88e6390_serdes_pcs_an_restart, + .serdes_pcs_link_up = mv88e6390_serdes_pcs_link_up, .serdes_irq_mapping = mv88e6390_serdes_irq_mapping, .serdes_irq_enable = mv88e6390_serdes_irq_enable, .serdes_irq_status = mv88e6390_serdes_irq_status, @@ -4196,8 +4252,7 @@ static const struct mv88e6xxx_ops mv88e6320_ops = { .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, - .port_set_speed = mv88e6185_port_set_speed, + .port_set_speed_duplex = mv88e6185_port_set_speed_duplex, .port_tag_remap = mv88e6095_port_tag_remap, .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_egress_floods = mv88e6352_port_set_egress_floods, @@ -4207,7 +4262,6 @@ static const struct mv88e6xxx_ops mv88e6320_ops = { .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, - .port_link_state = mv88e6352_port_link_state, .port_get_cmode = mv88e6352_port_get_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, .stats_snapshot = mv88e6320_g1_stats_snapshot, @@ -4240,8 +4294,7 @@ static const struct mv88e6xxx_ops mv88e6321_ops = { .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, - .port_set_speed = mv88e6185_port_set_speed, + .port_set_speed_duplex = mv88e6185_port_set_speed_duplex, .port_tag_remap = mv88e6095_port_tag_remap, .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_egress_floods = mv88e6352_port_set_egress_floods, @@ -4251,7 +4304,6 @@ static const struct mv88e6xxx_ops mv88e6321_ops = { .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, - .port_link_state = mv88e6352_port_link_state, .port_get_cmode = mv88e6352_port_get_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, .stats_snapshot = mv88e6320_g1_stats_snapshot, @@ -4282,9 +4334,8 @@ static const struct mv88e6xxx_ops mv88e6341_ops = { .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, - .port_set_speed = mv88e6341_port_set_speed, + .port_set_speed_duplex = mv88e6341_port_set_speed_duplex, .port_max_speed_mode = mv88e6341_port_max_speed_mode, .port_tag_remap = mv88e6095_port_tag_remap, .port_set_frame_mode = mv88e6351_port_set_frame_mode, @@ -4295,7 +4346,6 @@ static const struct mv88e6xxx_ops mv88e6341_ops = { .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, - .port_link_state = mv88e6352_port_link_state, .port_get_cmode = mv88e6352_port_get_cmode, .port_set_cmode = mv88e6341_port_set_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, @@ -4314,6 +4364,11 @@ static const struct mv88e6xxx_ops mv88e6341_ops = { .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .serdes_power = mv88e6390_serdes_power, .serdes_get_lane = mv88e6341_serdes_get_lane, + /* Check status register pause & lpa register */ + .serdes_pcs_get_state = mv88e6390_serdes_pcs_get_state, + .serdes_pcs_config = mv88e6390_serdes_pcs_config, + .serdes_pcs_an_restart = mv88e6390_serdes_pcs_an_restart, + .serdes_pcs_link_up = mv88e6390_serdes_pcs_link_up, .serdes_irq_mapping = mv88e6390_serdes_irq_mapping, .serdes_irq_enable = mv88e6390_serdes_irq_enable, .serdes_irq_status = mv88e6390_serdes_irq_status, @@ -4332,9 +4387,8 @@ static const struct mv88e6xxx_ops mv88e6350_ops = { .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, - .port_set_speed = mv88e6185_port_set_speed, + .port_set_speed_duplex = mv88e6185_port_set_speed_duplex, .port_tag_remap = mv88e6095_port_tag_remap, .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_egress_floods = mv88e6352_port_set_egress_floods, @@ -4344,7 +4398,6 @@ static const struct mv88e6xxx_ops mv88e6350_ops = { .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, - .port_link_state = mv88e6352_port_link_state, .port_get_cmode = mv88e6352_port_get_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, .stats_snapshot = mv88e6320_g1_stats_snapshot, @@ -4374,9 +4427,8 @@ static const struct mv88e6xxx_ops mv88e6351_ops = { .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, - .port_set_speed = mv88e6185_port_set_speed, + .port_set_speed_duplex = mv88e6185_port_set_speed_duplex, .port_tag_remap = mv88e6095_port_tag_remap, .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_egress_floods = mv88e6352_port_set_egress_floods, @@ -4386,7 +4438,6 @@ static const struct mv88e6xxx_ops mv88e6351_ops = { .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, - .port_link_state = mv88e6352_port_link_state, .port_get_cmode = mv88e6352_port_get_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, .stats_snapshot = mv88e6320_g1_stats_snapshot, @@ -4420,9 +4471,8 @@ static const struct mv88e6xxx_ops mv88e6352_ops = { .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, - .port_set_speed = mv88e6352_port_set_speed, + .port_set_speed_duplex = mv88e6352_port_set_speed_duplex, .port_tag_remap = mv88e6095_port_tag_remap, .port_set_policy = mv88e6352_port_set_policy, .port_set_frame_mode = mv88e6351_port_set_frame_mode, @@ -4433,7 +4483,6 @@ static const struct mv88e6xxx_ops mv88e6352_ops = { .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, - .port_link_state = mv88e6352_port_link_state, .port_get_cmode = mv88e6352_port_get_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, .stats_snapshot = mv88e6320_g1_stats_snapshot, @@ -4453,6 +4502,10 @@ static const struct mv88e6xxx_ops mv88e6352_ops = { .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .serdes_get_lane = mv88e6352_serdes_get_lane, + .serdes_pcs_get_state = mv88e6352_serdes_pcs_get_state, + .serdes_pcs_config = mv88e6352_serdes_pcs_config, + .serdes_pcs_an_restart = mv88e6352_serdes_pcs_an_restart, + .serdes_pcs_link_up = mv88e6352_serdes_pcs_link_up, .serdes_power = mv88e6352_serdes_power, .serdes_irq_mapping = mv88e6352_serdes_irq_mapping, .serdes_irq_enable = mv88e6352_serdes_irq_enable, @@ -4478,9 +4531,8 @@ static const struct mv88e6xxx_ops mv88e6390_ops = { .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, - .port_set_speed = mv88e6390_port_set_speed, + .port_set_speed_duplex = mv88e6390_port_set_speed_duplex, .port_max_speed_mode = mv88e6390_port_max_speed_mode, .port_tag_remap = mv88e6390_port_tag_remap, .port_set_policy = mv88e6352_port_set_policy, @@ -4492,7 +4544,6 @@ static const struct mv88e6xxx_ops mv88e6390_ops = { .port_pause_limit = mv88e6390_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, - .port_link_state = mv88e6352_port_link_state, .port_get_cmode = mv88e6352_port_get_cmode, .port_set_cmode = mv88e6390_port_set_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, @@ -4514,6 +4565,11 @@ static const struct mv88e6xxx_ops mv88e6390_ops = { .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, .serdes_power = mv88e6390_serdes_power, .serdes_get_lane = mv88e6390_serdes_get_lane, + /* Check status register pause & lpa register */ + .serdes_pcs_get_state = mv88e6390_serdes_pcs_get_state, + .serdes_pcs_config = mv88e6390_serdes_pcs_config, + .serdes_pcs_an_restart = mv88e6390_serdes_pcs_an_restart, + .serdes_pcs_link_up = mv88e6390_serdes_pcs_link_up, .serdes_irq_mapping = mv88e6390_serdes_irq_mapping, .serdes_irq_enable = mv88e6390_serdes_irq_enable, .serdes_irq_status = mv88e6390_serdes_irq_status, @@ -4538,9 +4594,8 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = { .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, .port_set_link = mv88e6xxx_port_set_link, - .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, - .port_set_speed = mv88e6390x_port_set_speed, + .port_set_speed_duplex = mv88e6390x_port_set_speed_duplex, .port_max_speed_mode = mv88e6390x_port_max_speed_mode, .port_tag_remap = mv88e6390_port_tag_remap, .port_set_policy = mv88e6352_port_set_policy, @@ -4552,7 +4607,6 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = { .port_pause_limit = mv88e6390_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, - .port_link_state = mv88e6352_port_link_state, .port_get_cmode = mv88e6352_port_get_cmode, .port_set_cmode = mv88e6390x_port_set_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, @@ -4574,6 +4628,10 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = { .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, .serdes_power = mv88e6390_serdes_power, .serdes_get_lane = mv88e6390x_serdes_get_lane, + .serdes_pcs_get_state = mv88e6390_serdes_pcs_get_state, + .serdes_pcs_config = mv88e6390_serdes_pcs_config, + .serdes_pcs_an_restart = mv88e6390_serdes_pcs_an_restart, + .serdes_pcs_link_up = mv88e6390_serdes_pcs_link_up, .serdes_irq_mapping = mv88e6390_serdes_irq_mapping, .serdes_irq_enable = mv88e6390_serdes_irq_enable, .serdes_irq_status = mv88e6390_serdes_irq_status, @@ -5452,8 +5510,9 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = { .setup = mv88e6xxx_setup, .teardown = mv88e6xxx_teardown, .phylink_validate = mv88e6xxx_validate, - .phylink_mac_link_state = mv88e6xxx_link_state, + .phylink_mac_link_state = mv88e6xxx_serdes_pcs_get_state, .phylink_mac_config = mv88e6xxx_mac_config, + .phylink_mac_an_restart = mv88e6xxx_serdes_pcs_an_restart, .phylink_mac_link_down = mv88e6xxx_mac_link_down, .phylink_mac_link_up = mv88e6xxx_mac_link_up, .get_strings = mv88e6xxx_get_strings, diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index 851686b45414..e5430cf2ad71 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -399,15 +399,6 @@ struct mv88e6xxx_ops { */ int (*port_set_link)(struct mv88e6xxx_chip *chip, int port, int link); -#define DUPLEX_UNFORCED -2 - - /* Port's MAC duplex mode - * - * Use DUPLEX_HALF or DUPLEX_FULL to force half or full duplex, - * or DUPLEX_UNFORCED for normal duplex detection. - */ - int (*port_set_duplex)(struct mv88e6xxx_chip *chip, int port, int dup); - #define PAUSE_ON 1 #define PAUSE_OFF 0 @@ -417,13 +408,18 @@ struct mv88e6xxx_ops { #define SPEED_MAX INT_MAX #define SPEED_UNFORCED -2 +#define DUPLEX_UNFORCED -2 - /* Port's MAC speed (in Mbps) + /* Port's MAC speed (in Mbps) and MAC duplex mode * * Depending on the chip, 10, 100, 200, 1000, 2500, 10000 are valid. * Use SPEED_UNFORCED for normal detection, SPEED_MAX for max value. + * + * Use DUPLEX_HALF or DUPLEX_FULL to force half or full duplex, + * or DUPLEX_UNFORCED for normal duplex detection. */ - int (*port_set_speed)(struct mv88e6xxx_chip *chip, int port, int speed); + int (*port_set_speed_duplex)(struct mv88e6xxx_chip *chip, int port, + int speed, int duplex); /* What interface mode should be used for maximum speed? */ phy_interface_t (*port_max_speed_mode)(int port); @@ -462,9 +458,6 @@ struct mv88e6xxx_ops { */ int (*port_set_upstream_port)(struct mv88e6xxx_chip *chip, int port, int upstream_port); - /* Return the port link state, as required by phylink */ - int (*port_link_state)(struct mv88e6xxx_chip *chip, int port, - struct phylink_link_state *state); /* Snapshot the statistics for a port. The statistics can then * be read back a leisure but still with a consistent view. @@ -502,6 +495,17 @@ struct mv88e6xxx_ops { /* SERDES lane mapping */ u8 (*serdes_get_lane)(struct mv88e6xxx_chip *chip, int port); + int (*serdes_pcs_get_state)(struct mv88e6xxx_chip *chip, int port, + u8 lane, struct phylink_link_state *state); + int (*serdes_pcs_config)(struct mv88e6xxx_chip *chip, int port, + u8 lane, unsigned int mode, + phy_interface_t interface, + const unsigned long *advertise); + int (*serdes_pcs_an_restart)(struct mv88e6xxx_chip *chip, int port, + u8 lane); + int (*serdes_pcs_link_up)(struct mv88e6xxx_chip *chip, int port, + u8 lane, int speed, int duplex); + /* SERDES interrupt handling */ unsigned int (*serdes_irq_mapping)(struct mv88e6xxx_chip *chip, int port); @@ -669,9 +673,6 @@ int mv88e6xxx_wait_mask(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask, u16 val); int mv88e6xxx_wait_bit(struct mv88e6xxx_chip *chip, int addr, int reg, int bit, int val); -int mv88e6xxx_port_setup_mac(struct mv88e6xxx_chip *chip, int port, int link, - int speed, int duplex, int pause, - phy_interface_t mode); struct mii_bus *mv88e6xxx_default_mdio_bus(struct mv88e6xxx_chip *chip); static inline void mv88e6xxx_reg_lock(struct mv88e6xxx_chip *chip) diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c index 01503014b1ee..8fd483020c5b 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.c +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -1099,6 +1099,13 @@ int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip) { int err, irq, virq; + chip->g2_irq.masked = ~0; + mv88e6xxx_reg_lock(chip); + err = mv88e6xxx_g2_int_mask(chip, ~chip->g2_irq.masked); + mv88e6xxx_reg_unlock(chip); + if (err) + return err; + chip->g2_irq.domain = irq_domain_add_simple( chip->dev->of_node, 16, 0, &mv88e6xxx_g2_irq_domain_ops, chip); if (!chip->g2_irq.domain) @@ -1108,7 +1115,6 @@ int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip) irq_create_mapping(chip->g2_irq.domain, irq); chip->g2_irq.chip = mv88e6xxx_g2_irq_chip; - chip->g2_irq.masked = ~0; chip->device_irq = irq_find_mapping(chip->g1_irq.domain, MV88E6XXX_G1_STS_IRQ_DEVICE); diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index 0b43c650e100..8128dc607cf4 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -162,46 +162,9 @@ int mv88e6xxx_port_set_link(struct mv88e6xxx_chip *chip, int port, int link) return 0; } -int mv88e6xxx_port_set_duplex(struct mv88e6xxx_chip *chip, int port, int dup) -{ - u16 reg; - int err; - - err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_MAC_CTL, ®); - if (err) - return err; - - reg &= ~(MV88E6XXX_PORT_MAC_CTL_FORCE_DUPLEX | - MV88E6XXX_PORT_MAC_CTL_DUPLEX_FULL); - - switch (dup) { - case DUPLEX_HALF: - reg |= MV88E6XXX_PORT_MAC_CTL_FORCE_DUPLEX; - break; - case DUPLEX_FULL: - reg |= MV88E6XXX_PORT_MAC_CTL_FORCE_DUPLEX | - MV88E6XXX_PORT_MAC_CTL_DUPLEX_FULL; - break; - case DUPLEX_UNFORCED: - /* normal duplex detection */ - break; - default: - return -EOPNOTSUPP; - } - - err = mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_MAC_CTL, reg); - if (err) - return err; - - dev_dbg(chip->dev, "p%d: %s %s duplex\n", port, - reg & MV88E6XXX_PORT_MAC_CTL_FORCE_DUPLEX ? "Force" : "Unforce", - reg & MV88E6XXX_PORT_MAC_CTL_DUPLEX_FULL ? "full" : "half"); - - return 0; -} - -static int mv88e6xxx_port_set_speed(struct mv88e6xxx_chip *chip, int port, - int speed, bool alt_bit, bool force_bit) +static int mv88e6xxx_port_set_speed_duplex(struct mv88e6xxx_chip *chip, + int port, int speed, bool alt_bit, + bool force_bit, int duplex) { u16 reg, ctrl; int err; @@ -239,11 +202,29 @@ static int mv88e6xxx_port_set_speed(struct mv88e6xxx_chip *chip, int port, return -EOPNOTSUPP; } + switch (duplex) { + case DUPLEX_HALF: + ctrl |= MV88E6XXX_PORT_MAC_CTL_FORCE_DUPLEX; + break; + case DUPLEX_FULL: + ctrl |= MV88E6XXX_PORT_MAC_CTL_FORCE_DUPLEX | + MV88E6XXX_PORT_MAC_CTL_DUPLEX_FULL; + break; + case DUPLEX_UNFORCED: + /* normal duplex detection */ + break; + default: + return -EOPNOTSUPP; + } + err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_MAC_CTL, ®); if (err) return err; - reg &= ~MV88E6XXX_PORT_MAC_CTL_SPEED_MASK; + reg &= ~(MV88E6XXX_PORT_MAC_CTL_SPEED_MASK | + MV88E6XXX_PORT_MAC_CTL_FORCE_DUPLEX | + MV88E6XXX_PORT_MAC_CTL_DUPLEX_FULL); + if (alt_bit) reg &= ~MV88E6390_PORT_MAC_CTL_ALTSPEED; if (force_bit) { @@ -261,12 +242,16 @@ static int mv88e6xxx_port_set_speed(struct mv88e6xxx_chip *chip, int port, dev_dbg(chip->dev, "p%d: Speed set to %d Mbps\n", port, speed); else dev_dbg(chip->dev, "p%d: Speed unforced\n", port); + dev_dbg(chip->dev, "p%d: %s %s duplex\n", port, + reg & MV88E6XXX_PORT_MAC_CTL_FORCE_DUPLEX ? "Force" : "Unforce", + reg & MV88E6XXX_PORT_MAC_CTL_DUPLEX_FULL ? "full" : "half"); return 0; } /* Support 10, 100, 200 Mbps (e.g. 88E6065 family) */ -int mv88e6065_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) +int mv88e6065_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, + int speed, int duplex) { if (speed == SPEED_MAX) speed = 200; @@ -275,11 +260,13 @@ int mv88e6065_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) return -EOPNOTSUPP; /* Setting 200 Mbps on port 0 to 3 selects 100 Mbps */ - return mv88e6xxx_port_set_speed(chip, port, speed, false, false); + return mv88e6xxx_port_set_speed_duplex(chip, port, speed, false, false, + duplex); } /* Support 10, 100, 1000 Mbps (e.g. 88E6185 family) */ -int mv88e6185_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) +int mv88e6185_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, + int speed, int duplex) { if (speed == SPEED_MAX) speed = 1000; @@ -287,11 +274,13 @@ int mv88e6185_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) if (speed == 200 || speed > 1000) return -EOPNOTSUPP; - return mv88e6xxx_port_set_speed(chip, port, speed, false, false); + return mv88e6xxx_port_set_speed_duplex(chip, port, speed, false, false, + duplex); } /* Support 10, 100 Mbps (e.g. 88E6250 family) */ -int mv88e6250_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) +int mv88e6250_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, + int speed, int duplex) { if (speed == SPEED_MAX) speed = 100; @@ -299,11 +288,13 @@ int mv88e6250_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) if (speed > 100) return -EOPNOTSUPP; - return mv88e6xxx_port_set_speed(chip, port, speed, false, false); + return mv88e6xxx_port_set_speed_duplex(chip, port, speed, false, false, + duplex); } /* Support 10, 100, 200, 1000, 2500 Mbps (e.g. 88E6341) */ -int mv88e6341_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) +int mv88e6341_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, + int speed, int duplex) { if (speed == SPEED_MAX) speed = port < 5 ? 1000 : 2500; @@ -317,7 +308,8 @@ int mv88e6341_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) if (speed == 2500 && port < 5) return -EOPNOTSUPP; - return mv88e6xxx_port_set_speed(chip, port, speed, !port, true); + return mv88e6xxx_port_set_speed_duplex(chip, port, speed, !port, true, + duplex); } phy_interface_t mv88e6341_port_max_speed_mode(int port) @@ -329,7 +321,8 @@ phy_interface_t mv88e6341_port_max_speed_mode(int port) } /* Support 10, 100, 200, 1000 Mbps (e.g. 88E6352 family) */ -int mv88e6352_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) +int mv88e6352_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, + int speed, int duplex) { if (speed == SPEED_MAX) speed = 1000; @@ -340,11 +333,13 @@ int mv88e6352_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) if (speed == 200 && port < 5) return -EOPNOTSUPP; - return mv88e6xxx_port_set_speed(chip, port, speed, true, false); + return mv88e6xxx_port_set_speed_duplex(chip, port, speed, true, false, + duplex); } /* Support 10, 100, 200, 1000, 2500 Mbps (e.g. 88E6390) */ -int mv88e6390_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) +int mv88e6390_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, + int speed, int duplex) { if (speed == SPEED_MAX) speed = port < 9 ? 1000 : 2500; @@ -358,7 +353,8 @@ int mv88e6390_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) if (speed == 2500 && port < 9) return -EOPNOTSUPP; - return mv88e6xxx_port_set_speed(chip, port, speed, true, true); + return mv88e6xxx_port_set_speed_duplex(chip, port, speed, true, true, + duplex); } phy_interface_t mv88e6390_port_max_speed_mode(int port) @@ -370,7 +366,8 @@ phy_interface_t mv88e6390_port_max_speed_mode(int port) } /* Support 10, 100, 200, 1000, 2500, 10000 Mbps (e.g. 88E6190X) */ -int mv88e6390x_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) +int mv88e6390x_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, + int speed, int duplex) { if (speed == SPEED_MAX) speed = port < 9 ? 1000 : 10000; @@ -381,7 +378,8 @@ int mv88e6390x_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) if (speed >= 2500 && port < 9) return -EOPNOTSUPP; - return mv88e6xxx_port_set_speed(chip, port, speed, true, true); + return mv88e6xxx_port_set_speed_duplex(chip, port, speed, true, true, + duplex); } phy_interface_t mv88e6390x_port_max_speed_mode(int port) @@ -586,183 +584,6 @@ int mv88e6352_port_get_cmode(struct mv88e6xxx_chip *chip, int port, u8 *cmode) return 0; } -int mv88e6250_port_link_state(struct mv88e6xxx_chip *chip, int port, - struct phylink_link_state *state) -{ - int err; - u16 reg; - - err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, ®); - if (err) - return err; - - if (port < 5) { - switch (reg & MV88E6250_PORT_STS_PORTMODE_MASK) { - case MV88E6250_PORT_STS_PORTMODE_PHY_10_HALF: - state->speed = SPEED_10; - state->duplex = DUPLEX_HALF; - break; - case MV88E6250_PORT_STS_PORTMODE_PHY_100_HALF: - state->speed = SPEED_100; - state->duplex = DUPLEX_HALF; - break; - case MV88E6250_PORT_STS_PORTMODE_PHY_10_FULL: - state->speed = SPEED_10; - state->duplex = DUPLEX_FULL; - break; - case MV88E6250_PORT_STS_PORTMODE_PHY_100_FULL: - state->speed = SPEED_100; - state->duplex = DUPLEX_FULL; - break; - default: - state->speed = SPEED_UNKNOWN; - state->duplex = DUPLEX_UNKNOWN; - break; - } - } else { - switch (reg & MV88E6250_PORT_STS_PORTMODE_MASK) { - case MV88E6250_PORT_STS_PORTMODE_MII_10_HALF: - state->speed = SPEED_10; - state->duplex = DUPLEX_HALF; - break; - case MV88E6250_PORT_STS_PORTMODE_MII_100_HALF: - state->speed = SPEED_100; - state->duplex = DUPLEX_HALF; - break; - case MV88E6250_PORT_STS_PORTMODE_MII_10_FULL: - state->speed = SPEED_10; - state->duplex = DUPLEX_FULL; - break; - case MV88E6250_PORT_STS_PORTMODE_MII_100_FULL: - state->speed = SPEED_100; - state->duplex = DUPLEX_FULL; - break; - default: - state->speed = SPEED_UNKNOWN; - state->duplex = DUPLEX_UNKNOWN; - break; - } - } - - state->link = !!(reg & MV88E6250_PORT_STS_LINK); - state->an_enabled = 1; - state->an_complete = state->link; - state->interface = PHY_INTERFACE_MODE_NA; - - return 0; -} - -int mv88e6352_port_link_state(struct mv88e6xxx_chip *chip, int port, - struct phylink_link_state *state) -{ - int err; - u16 reg; - - switch (chip->ports[port].cmode) { - case MV88E6XXX_PORT_STS_CMODE_RGMII: - err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_MAC_CTL, - ®); - if (err) - return err; - - if ((reg & MV88E6XXX_PORT_MAC_CTL_RGMII_DELAY_RXCLK) && - (reg & MV88E6XXX_PORT_MAC_CTL_RGMII_DELAY_TXCLK)) - state->interface = PHY_INTERFACE_MODE_RGMII_ID; - else if (reg & MV88E6XXX_PORT_MAC_CTL_RGMII_DELAY_RXCLK) - state->interface = PHY_INTERFACE_MODE_RGMII_RXID; - else if (reg & MV88E6XXX_PORT_MAC_CTL_RGMII_DELAY_TXCLK) - state->interface = PHY_INTERFACE_MODE_RGMII_TXID; - else - state->interface = PHY_INTERFACE_MODE_RGMII; - break; - case MV88E6XXX_PORT_STS_CMODE_1000BASEX: - state->interface = PHY_INTERFACE_MODE_1000BASEX; - break; - case MV88E6XXX_PORT_STS_CMODE_SGMII: - state->interface = PHY_INTERFACE_MODE_SGMII; - break; - case MV88E6XXX_PORT_STS_CMODE_2500BASEX: - state->interface = PHY_INTERFACE_MODE_2500BASEX; - break; - case MV88E6XXX_PORT_STS_CMODE_XAUI: - state->interface = PHY_INTERFACE_MODE_XAUI; - break; - case MV88E6XXX_PORT_STS_CMODE_RXAUI: - state->interface = PHY_INTERFACE_MODE_RXAUI; - break; - default: - /* we do not support other cmode values here */ - state->interface = PHY_INTERFACE_MODE_NA; - } - - err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, ®); - if (err) - return err; - - switch (reg & MV88E6XXX_PORT_STS_SPEED_MASK) { - case MV88E6XXX_PORT_STS_SPEED_10: - state->speed = SPEED_10; - break; - case MV88E6XXX_PORT_STS_SPEED_100: - state->speed = SPEED_100; - break; - case MV88E6XXX_PORT_STS_SPEED_1000: - state->speed = SPEED_1000; - break; - case MV88E6XXX_PORT_STS_SPEED_10000: - if ((reg & MV88E6XXX_PORT_STS_CMODE_MASK) == - MV88E6XXX_PORT_STS_CMODE_2500BASEX) - state->speed = SPEED_2500; - else - state->speed = SPEED_10000; - break; - } - - state->duplex = reg & MV88E6XXX_PORT_STS_DUPLEX ? - DUPLEX_FULL : DUPLEX_HALF; - state->link = !!(reg & MV88E6XXX_PORT_STS_LINK); - state->an_enabled = 1; - state->an_complete = state->link; - - return 0; -} - -int mv88e6185_port_link_state(struct mv88e6xxx_chip *chip, int port, - struct phylink_link_state *state) -{ - if (state->interface == PHY_INTERFACE_MODE_1000BASEX) { - u8 cmode = chip->ports[port].cmode; - - /* When a port is in "Cross-chip serdes" mode, it uses - * 1000Base-X full duplex mode, but there is no automatic - * link detection. Use the sync OK status for link (as it - * would do for 1000Base-X mode.) - */ - if (cmode == MV88E6185_PORT_STS_CMODE_SERDES) { - u16 mac; - int err; - - err = mv88e6xxx_port_read(chip, port, - MV88E6XXX_PORT_MAC_CTL, &mac); - if (err) - return err; - - state->link = !!(mac & MV88E6185_PORT_MAC_CTL_SYNC_OK); - state->an_enabled = 1; - state->an_complete = - !!(mac & MV88E6185_PORT_MAC_CTL_AN_DONE); - state->duplex = - state->link ? DUPLEX_FULL : DUPLEX_UNKNOWN; - state->speed = - state->link ? SPEED_1000 : SPEED_UNKNOWN; - - return 0; - } - } - - return mv88e6352_port_link_state(chip, port, state); -} - /* Offset 0x02: Jamming Control * * Do not limit the period of time that this port can be paused for by diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h index 0ec4327c2b42..44d76ac973f6 100644 --- a/drivers/net/dsa/mv88e6xxx/port.h +++ b/drivers/net/dsa/mv88e6xxx/port.h @@ -298,15 +298,20 @@ int mv88e6390_port_set_rgmii_delay(struct mv88e6xxx_chip *chip, int port, int mv88e6xxx_port_set_link(struct mv88e6xxx_chip *chip, int port, int link); -int mv88e6xxx_port_set_duplex(struct mv88e6xxx_chip *chip, int port, int dup); - -int mv88e6065_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); -int mv88e6185_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); -int mv88e6250_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); -int mv88e6341_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); -int mv88e6352_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); -int mv88e6390_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); -int mv88e6390x_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); +int mv88e6065_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, + int speed, int duplex); +int mv88e6185_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, + int speed, int duplex); +int mv88e6250_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, + int speed, int duplex); +int mv88e6341_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, + int speed, int duplex); +int mv88e6352_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, + int speed, int duplex); +int mv88e6390_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, + int speed, int duplex); +int mv88e6390x_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, + int speed, int duplex); phy_interface_t mv88e6341_port_max_speed_mode(int port); phy_interface_t mv88e6390_port_max_speed_mode(int port); @@ -359,12 +364,6 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port, phy_interface_t mode); int mv88e6185_port_get_cmode(struct mv88e6xxx_chip *chip, int port, u8 *cmode); int mv88e6352_port_get_cmode(struct mv88e6xxx_chip *chip, int port, u8 *cmode); -int mv88e6185_port_link_state(struct mv88e6xxx_chip *chip, int port, - struct phylink_link_state *state); -int mv88e6250_port_link_state(struct mv88e6xxx_chip *chip, int port, - struct phylink_link_state *state); -int mv88e6352_port_link_state(struct mv88e6xxx_chip *chip, int port, - struct phylink_link_state *state); int mv88e6xxx_port_set_map_da(struct mv88e6xxx_chip *chip, int port); int mv88e6095_port_set_upstream_port(struct mv88e6xxx_chip *chip, int port, int upstream_port); diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c index 238219787233..2098f19b534d 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.c +++ b/drivers/net/dsa/mv88e6xxx/serdes.c @@ -49,6 +49,52 @@ static int mv88e6390_serdes_write(struct mv88e6xxx_chip *chip, return mv88e6xxx_phy_write(chip, lane, reg_c45, val); } +static int mv88e6xxx_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, + u16 status, u16 lpa, + struct phylink_link_state *state) +{ + if (status & MV88E6390_SGMII_PHY_STATUS_SPD_DPL_VALID) { + state->link = !!(status & MV88E6390_SGMII_PHY_STATUS_LINK); + state->duplex = status & + MV88E6390_SGMII_PHY_STATUS_DUPLEX_FULL ? + DUPLEX_FULL : DUPLEX_HALF; + + if (status & MV88E6390_SGMII_PHY_STATUS_TX_PAUSE) + state->pause |= MLO_PAUSE_TX; + if (status & MV88E6390_SGMII_PHY_STATUS_RX_PAUSE) + state->pause |= MLO_PAUSE_RX; + + switch (status & MV88E6390_SGMII_PHY_STATUS_SPEED_MASK) { + case MV88E6390_SGMII_PHY_STATUS_SPEED_1000: + if (state->interface == PHY_INTERFACE_MODE_2500BASEX) + state->speed = SPEED_2500; + else + state->speed = SPEED_1000; + break; + case MV88E6390_SGMII_PHY_STATUS_SPEED_100: + state->speed = SPEED_100; + break; + case MV88E6390_SGMII_PHY_STATUS_SPEED_10: + state->speed = SPEED_10; + break; + default: + dev_err(chip->dev, "invalid PHY speed\n"); + return -EINVAL; + } + } else { + state->link = false; + } + + if (state->interface == PHY_INTERFACE_MODE_2500BASEX) + mii_lpa_mod_linkmode_x(state->lp_advertising, lpa, + ETHTOOL_LINK_MODE_2500baseX_Full_BIT); + else if (state->interface == PHY_INTERFACE_MODE_1000BASEX) + mii_lpa_mod_linkmode_x(state->lp_advertising, lpa, + ETHTOOL_LINK_MODE_1000baseX_Full_BIT); + + return 0; +} + int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, u8 lane, bool up) { @@ -70,6 +116,120 @@ int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, u8 lane, return err; } +int mv88e6352_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port, + u8 lane, unsigned int mode, + phy_interface_t interface, + const unsigned long *advertise) +{ + u16 adv, bmcr, val; + bool changed; + int err; + + switch (interface) { + case PHY_INTERFACE_MODE_SGMII: + adv = 0x0001; + break; + + case PHY_INTERFACE_MODE_1000BASEX: + adv = linkmode_adv_to_mii_adv_x(advertise, + ETHTOOL_LINK_MODE_1000baseX_Full_BIT); + break; + + default: + return 0; + } + + err = mv88e6352_serdes_read(chip, MII_ADVERTISE, &val); + if (err) + return err; + + changed = val != adv; + if (changed) { + err = mv88e6352_serdes_write(chip, MII_ADVERTISE, adv); + if (err) + return err; + } + + err = mv88e6352_serdes_read(chip, MII_BMCR, &val); + if (err) + return err; + + if (phylink_autoneg_inband(mode)) + bmcr = val | BMCR_ANENABLE; + else + bmcr = val & ~BMCR_ANENABLE; + + if (bmcr == val) + return changed; + + return mv88e6352_serdes_write(chip, MII_BMCR, bmcr); +} + +int mv88e6352_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port, + u8 lane, struct phylink_link_state *state) +{ + u16 lpa, status; + int err; + + err = mv88e6352_serdes_read(chip, 0x11, &status); + if (err) { + dev_err(chip->dev, "can't read Serdes PHY status: %d\n", err); + return err; + } + + err = mv88e6352_serdes_read(chip, MII_LPA, &lpa); + if (err) { + dev_err(chip->dev, "can't read Serdes PHY LPA: %d\n", err); + return err; + } + + return mv88e6xxx_serdes_pcs_get_state(chip, status, lpa, state); +} + +int mv88e6352_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port, + u8 lane) +{ + u16 bmcr; + int err; + + err = mv88e6352_serdes_read(chip, MII_BMCR, &bmcr); + if (err) + return err; + + return mv88e6352_serdes_write(chip, MII_BMCR, bmcr | BMCR_ANRESTART); +} + +int mv88e6352_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port, + u8 lane, int speed, int duplex) +{ + u16 val, bmcr; + int err; + + err = mv88e6352_serdes_read(chip, MII_BMCR, &val); + if (err) + return err; + + bmcr = val & ~(BMCR_SPEED100 | BMCR_FULLDPLX | BMCR_SPEED1000); + switch (speed) { + case SPEED_1000: + bmcr |= BMCR_SPEED1000; + break; + case SPEED_100: + bmcr |= BMCR_SPEED100; + break; + case SPEED_10: + break; + } + + if (duplex == DUPLEX_FULL) + bmcr |= BMCR_FULLDPLX; + + if (bmcr == val) + return 0; + + return mv88e6352_serdes_write(chip, MII_BMCR, bmcr); +} + u8 mv88e6352_serdes_get_lane(struct mv88e6xxx_chip *chip, int port) { u8 cmode = chip->ports[port].cmode; @@ -180,26 +340,17 @@ int mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port, static void mv88e6352_serdes_irq_link(struct mv88e6xxx_chip *chip, int port) { - struct dsa_switch *ds = chip->ds; - u16 status; - bool up; + u16 bmsr; int err; - err = mv88e6352_serdes_read(chip, MII_BMSR, &status); - if (err) - return; - - /* Status must be read twice in order to give the current link - * status. Otherwise the change in link status since the last - * read of the register is returned. - */ - err = mv88e6352_serdes_read(chip, MII_BMSR, &status); - if (err) + /* If the link has dropped, we want to know about it. */ + err = mv88e6352_serdes_read(chip, MII_BMSR, &bmsr); + if (err) { + dev_err(chip->dev, "can't read Serdes BMSR: %d\n", err); return; + } - up = status & BMSR_LSTATUS; - - dsa_port_phylink_mac_change(ds, port, up); + dsa_port_phylink_mac_change(chip->ds, port, !!(bmsr & BMSR_LSTATUS)); } irqreturn_t mv88e6352_serdes_irq_status(struct mv88e6xxx_chip *chip, int port, @@ -410,20 +561,18 @@ static int mv88e6390_serdes_power_sgmii(struct mv88e6xxx_chip *chip, u8 lane, int err; err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS, - MV88E6390_SGMII_CONTROL, &val); + MV88E6390_SGMII_BMCR, &val); if (err) return err; if (up) - new_val = val & ~(MV88E6390_SGMII_CONTROL_RESET | - MV88E6390_SGMII_CONTROL_LOOPBACK | - MV88E6390_SGMII_CONTROL_PDOWN); + new_val = val & ~(BMCR_RESET | BMCR_LOOPBACK | BMCR_PDOWN); else - new_val = val | MV88E6390_SGMII_CONTROL_PDOWN; + new_val = val | BMCR_PDOWN; if (val != new_val) err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS, - MV88E6390_SGMII_CONTROL, new_val); + MV88E6390_SGMII_BMCR, new_val); return err; } @@ -540,71 +689,153 @@ int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, u8 lane, return err; } -static void mv88e6390_serdes_irq_link_sgmii(struct mv88e6xxx_chip *chip, - int port, u8 lane) +int mv88e6390_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port, + u8 lane, unsigned int mode, + phy_interface_t interface, + const unsigned long *advertise) { - u8 cmode = chip->ports[port].cmode; - struct dsa_switch *ds = chip->ds; - int duplex = DUPLEX_UNKNOWN; - int speed = SPEED_UNKNOWN; - phy_interface_t mode; - int link, err; - u16 status; + u16 val, bmcr, adv; + bool changed; + int err; + + switch (interface) { + case PHY_INTERFACE_MODE_SGMII: + adv = 0x0001; + break; + + case PHY_INTERFACE_MODE_1000BASEX: + adv = linkmode_adv_to_mii_adv_x(advertise, + ETHTOOL_LINK_MODE_1000baseX_Full_BIT); + break; + + case PHY_INTERFACE_MODE_2500BASEX: + adv = linkmode_adv_to_mii_adv_x(advertise, + ETHTOOL_LINK_MODE_2500baseX_Full_BIT); + break; + + default: + return 0; + } + + err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS, + MV88E6390_SGMII_ADVERTISE, &val); + if (err) + return err; + + changed = val != adv; + if (changed) { + err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS, + MV88E6390_SGMII_ADVERTISE, adv); + if (err) + return err; + } + + err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS, + MV88E6390_SGMII_BMCR, &val); + if (err) + return err; + + if (phylink_autoneg_inband(mode)) + bmcr = val | BMCR_ANENABLE; + else + bmcr = val & ~BMCR_ANENABLE; + + /* setting ANENABLE triggers a restart of negotiation */ + if (bmcr == val) + return changed; + + return mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS, + MV88E6390_SGMII_BMCR, bmcr); +} + +int mv88e6390_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port, + u8 lane, struct phylink_link_state *state) +{ + u16 lpa, status; + int err; err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS, MV88E6390_SGMII_PHY_STATUS, &status); if (err) { - dev_err(chip->dev, "can't read SGMII PHY status: %d\n", err); - return; + dev_err(chip->dev, "can't read Serdes PHY status: %d\n", err); + return err; } - link = status & MV88E6390_SGMII_PHY_STATUS_LINK ? - LINK_FORCED_UP : LINK_FORCED_DOWN; + err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS, + MV88E6390_SGMII_LPA, &lpa); + if (err) { + dev_err(chip->dev, "can't read Serdes PHY LPA: %d\n", err); + return err; + } - if (status & MV88E6390_SGMII_PHY_STATUS_SPD_DPL_VALID) { - duplex = status & MV88E6390_SGMII_PHY_STATUS_DUPLEX_FULL ? - DUPLEX_FULL : DUPLEX_HALF; + return mv88e6xxx_serdes_pcs_get_state(chip, status, lpa, state); +} - switch (status & MV88E6390_SGMII_PHY_STATUS_SPEED_MASK) { - case MV88E6390_SGMII_PHY_STATUS_SPEED_1000: - if (cmode == MV88E6XXX_PORT_STS_CMODE_2500BASEX) - speed = SPEED_2500; - else - speed = SPEED_1000; - break; - case MV88E6390_SGMII_PHY_STATUS_SPEED_100: - speed = SPEED_100; - break; - case MV88E6390_SGMII_PHY_STATUS_SPEED_10: - speed = SPEED_10; - break; - default: - dev_err(chip->dev, "invalid PHY speed\n"); - return; - } - } +int mv88e6390_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port, + u8 lane) +{ + u16 bmcr; + int err; - switch (cmode) { - case MV88E6XXX_PORT_STS_CMODE_SGMII: - mode = PHY_INTERFACE_MODE_SGMII; + err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS, + MV88E6390_SGMII_BMCR, &bmcr); + if (err) + return err; + + return mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS, + MV88E6390_SGMII_BMCR, + bmcr | BMCR_ANRESTART); +} + +int mv88e6390_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port, + u8 lane, int speed, int duplex) +{ + u16 val, bmcr; + int err; + + err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS, + MV88E6390_SGMII_BMCR, &val); + if (err) + return err; + + bmcr = val & ~(BMCR_SPEED100 | BMCR_FULLDPLX | BMCR_SPEED1000); + switch (speed) { + case SPEED_2500: + case SPEED_1000: + bmcr |= BMCR_SPEED1000; break; - case MV88E6XXX_PORT_STS_CMODE_1000BASEX: - mode = PHY_INTERFACE_MODE_1000BASEX; + case SPEED_100: + bmcr |= BMCR_SPEED100; break; - case MV88E6XXX_PORT_STS_CMODE_2500BASEX: - mode = PHY_INTERFACE_MODE_2500BASEX; + case SPEED_10: break; - default: - mode = PHY_INTERFACE_MODE_NA; } - err = mv88e6xxx_port_setup_mac(chip, port, link, speed, duplex, - PAUSE_OFF, mode); - if (err) - dev_err(chip->dev, "can't propagate PHY settings to MAC: %d\n", - err); - else - dsa_port_phylink_mac_change(ds, port, link == LINK_FORCED_UP); + if (duplex == DUPLEX_FULL) + bmcr |= BMCR_FULLDPLX; + + if (bmcr == val) + return 0; + + return mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS, + MV88E6390_SGMII_BMCR, bmcr); +} + +static void mv88e6390_serdes_irq_link_sgmii(struct mv88e6xxx_chip *chip, + int port, u8 lane) +{ + u16 bmsr; + int err; + + /* If the link has dropped, we want to know about it. */ + err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS, + MV88E6390_SGMII_BMSR, &bmsr); + if (err) { + dev_err(chip->dev, "can't read Serdes BMSR: %d\n", err); + return; + } + + dsa_port_phylink_mac_change(chip->ds, port, !!(bmsr & BMSR_LSTATUS)); } static int mv88e6390_serdes_irq_enable_sgmii(struct mv88e6xxx_chip *chip, diff --git a/drivers/net/dsa/mv88e6xxx/serdes.h b/drivers/net/dsa/mv88e6xxx/serdes.h index 1906b3ab29c6..7990cadba4c2 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.h +++ b/drivers/net/dsa/mv88e6xxx/serdes.h @@ -47,14 +47,10 @@ #define MV88E6390_PCS_CONTROL_1_PDOWN BIT(11) /* 1000BASE-X and SGMII */ -#define MV88E6390_SGMII_CONTROL 0x2000 -#define MV88E6390_SGMII_CONTROL_RESET BIT(15) -#define MV88E6390_SGMII_CONTROL_LOOPBACK BIT(14) -#define MV88E6390_SGMII_CONTROL_PDOWN BIT(11) -#define MV88E6390_SGMII_STATUS 0x2001 -#define MV88E6390_SGMII_STATUS_AN_DONE BIT(5) -#define MV88E6390_SGMII_STATUS_REMOTE_FAULT BIT(4) -#define MV88E6390_SGMII_STATUS_LINK BIT(2) +#define MV88E6390_SGMII_BMCR (0x2000 + MII_BMCR) +#define MV88E6390_SGMII_BMSR (0x2000 + MII_BMSR) +#define MV88E6390_SGMII_ADVERTISE (0x2000 + MII_ADVERTISE) +#define MV88E6390_SGMII_LPA (0x2000 + MII_LPA) #define MV88E6390_SGMII_INT_ENABLE 0xa001 #define MV88E6390_SGMII_INT_SPEED_CHANGE BIT(14) #define MV88E6390_SGMII_INT_DUPLEX_CHANGE BIT(13) @@ -73,6 +69,8 @@ #define MV88E6390_SGMII_PHY_STATUS_DUPLEX_FULL BIT(13) #define MV88E6390_SGMII_PHY_STATUS_SPD_DPL_VALID BIT(11) #define MV88E6390_SGMII_PHY_STATUS_LINK BIT(10) +#define MV88E6390_SGMII_PHY_STATUS_TX_PAUSE BIT(3) +#define MV88E6390_SGMII_PHY_STATUS_RX_PAUSE BIT(2) /* Packet generator pad packet checker */ #define MV88E6390_PG_CONTROL 0xf010 @@ -82,6 +80,26 @@ u8 mv88e6341_serdes_get_lane(struct mv88e6xxx_chip *chip, int port); u8 mv88e6352_serdes_get_lane(struct mv88e6xxx_chip *chip, int port); u8 mv88e6390_serdes_get_lane(struct mv88e6xxx_chip *chip, int port); u8 mv88e6390x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port); +int mv88e6352_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port, + u8 lane, unsigned int mode, + phy_interface_t interface, + const unsigned long *advertise); +int mv88e6390_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port, + u8 lane, unsigned int mode, + phy_interface_t interface, + const unsigned long *advertise); +int mv88e6352_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port, + u8 lane, struct phylink_link_state *state); +int mv88e6390_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port, + u8 lane, struct phylink_link_state *state); +int mv88e6352_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port, + u8 lane); +int mv88e6390_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port, + u8 lane); +int mv88e6352_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port, + u8 lane, int speed, int duplex); +int mv88e6390_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port, + u8 lane, int speed, int duplex); unsigned int mv88e6352_serdes_irq_mapping(struct mv88e6xxx_chip *chip, int port); unsigned int mv88e6390_serdes_irq_mapping(struct mv88e6xxx_chip *chip, diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h index d801fc204d19..a358fc89a6db 100644 --- a/drivers/net/dsa/sja1105/sja1105.h +++ b/drivers/net/dsa/sja1105/sja1105.h @@ -36,11 +36,15 @@ struct sja1105_regs { u64 port_control; u64 rgu; u64 config; + u64 sgmii; u64 rmii_pll1; + u64 ptppinst; + u64 ptppindur; u64 ptp_control; u64 ptpclkval; u64 ptpclkrate; u64 ptpclkcorp; + u64 ptpsyncts; u64 ptpschtm; u64 ptpegr_ts[SJA1105_NUM_PORTS]; u64 pad_mii_tx[SJA1105_NUM_PORTS]; @@ -159,6 +163,7 @@ typedef enum { XMII_MODE_MII = 0, XMII_MODE_RMII = 1, XMII_MODE_RGMII = 2, + XMII_MODE_SGMII = 3, } sja1105_phy_interface_t; typedef enum { @@ -212,5 +217,7 @@ size_t sja1105_vlan_lookup_entry_packing(void *buf, void *entry_ptr, enum packing_op op); size_t sja1105pqrs_mac_config_entry_packing(void *buf, void *entry_ptr, enum packing_op op); +size_t sja1105pqrs_avb_params_entry_packing(void *buf, void *entry_ptr, + enum packing_op op); #endif diff --git a/drivers/net/dsa/sja1105/sja1105_clocking.c b/drivers/net/dsa/sja1105/sja1105_clocking.c index 9082e52b55e9..0fdc2d55fff6 100644 --- a/drivers/net/dsa/sja1105/sja1105_clocking.c +++ b/drivers/net/dsa/sja1105/sja1105_clocking.c @@ -660,6 +660,10 @@ int sja1105_clocking_setup_port(struct sja1105_private *priv, int port) case XMII_MODE_RGMII: rc = sja1105_rgmii_clocking_setup(priv, port, role); break; + case XMII_MODE_SGMII: + /* Nothing to do in the CGU for SGMII */ + rc = 0; + break; default: dev_err(dev, "Invalid interface mode specified: %d\n", phy_mode); diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c index 25381bd65ed7..bf9b36ff35bf 100644 --- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c +++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c @@ -124,6 +124,9 @@ #define SJA1105ET_SIZE_GENERAL_PARAMS_DYN_CMD \ SJA1105_SIZE_DYN_CMD +#define SJA1105PQRS_SIZE_AVB_PARAMS_DYN_CMD \ + (SJA1105_SIZE_DYN_CMD + SJA1105PQRS_SIZE_AVB_PARAMS_ENTRY) + #define SJA1105_MAX_DYN_CMD_SIZE \ SJA1105PQRS_SIZE_MAC_CONFIG_DYN_CMD @@ -481,6 +484,18 @@ sja1105et_general_params_entry_packing(void *buf, void *entry_ptr, return 0; } +static void +sja1105pqrs_avb_params_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, + enum packing_op op) +{ + u8 *p = buf + SJA1105PQRS_SIZE_AVB_PARAMS_ENTRY; + const int size = SJA1105_SIZE_DYN_CMD; + + sja1105_packing(p, &cmd->valid, 31, 31, size, op); + sja1105_packing(p, &cmd->errors, 30, 30, size, op); + sja1105_packing(p, &cmd->rdwrset, 29, 29, size, op); +} + #define OP_READ BIT(0) #define OP_WRITE BIT(1) #define OP_DEL BIT(2) @@ -610,7 +625,14 @@ struct sja1105_dynamic_table_ops sja1105pqrs_dyn_ops[BLK_IDX_MAX_DYN] = { .addr = 0x38, }, [BLK_IDX_L2_FORWARDING_PARAMS] = {0}, - [BLK_IDX_AVB_PARAMS] = {0}, + [BLK_IDX_AVB_PARAMS] = { + .entry_packing = sja1105pqrs_avb_params_entry_packing, + .cmd_packing = sja1105pqrs_avb_params_cmd_packing, + .max_entry_count = SJA1105_MAX_AVB_PARAMS_COUNT, + .access = (OP_READ | OP_WRITE), + .packed_size = SJA1105PQRS_SIZE_AVB_PARAMS_DYN_CMD, + .addr = 0x8003, + }, [BLK_IDX_GENERAL_PARAMS] = { .entry_packing = sja1105et_general_params_entry_packing, .cmd_packing = sja1105et_general_params_cmd_packing, diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 6fe679143216..e0c99bb63cdf 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -22,6 +22,7 @@ #include <linux/if_ether.h> #include <linux/dsa/8021q.h> #include "sja1105.h" +#include "sja1105_sgmii.h" #include "sja1105_tas.h" static void sja1105_hw_reset(struct gpio_desc *gpio, unsigned int pulse_len, @@ -135,6 +136,21 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv) return 0; } +static bool sja1105_supports_sgmii(struct sja1105_private *priv, int port) +{ + if (priv->info->part_no != SJA1105R_PART_NO && + priv->info->part_no != SJA1105S_PART_NO) + return false; + + if (port != SJA1105_SGMII_PORT) + return false; + + if (dsa_is_unused_port(priv->ds, port)) + return false; + + return true; +} + static int sja1105_init_mii_settings(struct sja1105_private *priv, struct sja1105_dt_port *ports) { @@ -162,6 +178,9 @@ static int sja1105_init_mii_settings(struct sja1105_private *priv, mii = table->entries; for (i = 0; i < SJA1105_NUM_PORTS; i++) { + if (dsa_is_unused_port(priv->ds, i)) + continue; + switch (ports[i].phy_mode) { case PHY_INTERFACE_MODE_MII: mii->xmii_mode[i] = XMII_MODE_MII; @@ -175,12 +194,24 @@ static int sja1105_init_mii_settings(struct sja1105_private *priv, case PHY_INTERFACE_MODE_RGMII_TXID: mii->xmii_mode[i] = XMII_MODE_RGMII; break; + case PHY_INTERFACE_MODE_SGMII: + if (!sja1105_supports_sgmii(priv, i)) + return -EINVAL; + mii->xmii_mode[i] = XMII_MODE_SGMII; + break; default: dev_err(dev, "Unsupported PHY mode %s!\n", phy_modes(ports[i].phy_mode)); } - mii->phy_mac[i] = ports[i].role; + /* Even though the SerDes port is able to drive SGMII autoneg + * like a PHY would, from the perspective of the XMII tables, + * the SGMII port should always be put in MAC mode. + */ + if (ports[i].phy_mode == PHY_INTERFACE_MODE_SGMII) + mii->phy_mac[i] = XMII_MAC; + else + mii->phy_mac[i] = ports[i].role; } return 0; } @@ -448,6 +479,43 @@ static int sja1105_init_general_params(struct sja1105_private *priv) return 0; } +static int sja1105_init_avb_params(struct sja1105_private *priv) +{ + struct sja1105_avb_params_entry *avb; + struct sja1105_table *table; + + table = &priv->static_config.tables[BLK_IDX_AVB_PARAMS]; + + /* Discard previous AVB Parameters Table */ + if (table->entry_count) { + kfree(table->entries); + table->entry_count = 0; + } + + table->entries = kcalloc(SJA1105_MAX_AVB_PARAMS_COUNT, + table->ops->unpacked_entry_size, GFP_KERNEL); + if (!table->entries) + return -ENOMEM; + + table->entry_count = SJA1105_MAX_AVB_PARAMS_COUNT; + + avb = table->entries; + + /* Configure the MAC addresses for meta frames */ + avb->destmeta = SJA1105_META_DMAC; + avb->srcmeta = SJA1105_META_SMAC; + /* On P/Q/R/S, configure the direction of the PTP_CLK pin as input by + * default. This is because there might be boards with a hardware + * layout where enabling the pin as output might cause an electrical + * clash. On E/T the pin is always an output, which the board designers + * probably already knew, so even if there are going to be electrical + * issues, there's nothing we can do. + */ + avb->cas_master = false; + + return 0; +} + #define SJA1105_RATE_MBPS(speed) (((speed) * 64000) / 1000) static void sja1105_setup_policer(struct sja1105_l2_policing_entry *policing, @@ -538,6 +606,9 @@ static int sja1105_static_config_load(struct sja1105_private *priv, rc = sja1105_init_general_params(priv); if (rc < 0) return rc; + rc = sja1105_init_avb_params(priv); + if (rc < 0) + return rc; /* Send initial configuration to hardware via SPI */ return sja1105_static_config_upload(priv); @@ -647,6 +718,85 @@ static int sja1105_parse_dt(struct sja1105_private *priv, return rc; } +static int sja1105_sgmii_read(struct sja1105_private *priv, int pcs_reg) +{ + const struct sja1105_regs *regs = priv->info->regs; + u32 val; + int rc; + + rc = sja1105_xfer_u32(priv, SPI_READ, regs->sgmii + pcs_reg, &val, + NULL); + if (rc < 0) + return rc; + + return val; +} + +static int sja1105_sgmii_write(struct sja1105_private *priv, int pcs_reg, + u16 pcs_val) +{ + const struct sja1105_regs *regs = priv->info->regs; + u32 val = pcs_val; + int rc; + + rc = sja1105_xfer_u32(priv, SPI_WRITE, regs->sgmii + pcs_reg, &val, + NULL); + if (rc < 0) + return rc; + + return val; +} + +static void sja1105_sgmii_pcs_config(struct sja1105_private *priv, + bool an_enabled, bool an_master) +{ + u16 ac = SJA1105_AC_AUTONEG_MODE_SGMII; + + /* DIGITAL_CONTROL_1: Enable vendor-specific MMD1, allow the PHY to + * stop the clock during LPI mode, make the MAC reconfigure + * autonomously after PCS autoneg is done, flush the internal FIFOs. + */ + sja1105_sgmii_write(priv, SJA1105_DC1, SJA1105_DC1_EN_VSMMD1 | + SJA1105_DC1_CLOCK_STOP_EN | + SJA1105_DC1_MAC_AUTO_SW | + SJA1105_DC1_INIT); + /* DIGITAL_CONTROL_2: No polarity inversion for TX and RX lanes */ + sja1105_sgmii_write(priv, SJA1105_DC2, SJA1105_DC2_TX_POL_INV_DISABLE); + /* AUTONEG_CONTROL: Use SGMII autoneg */ + if (an_master) + ac |= SJA1105_AC_PHY_MODE | SJA1105_AC_SGMII_LINK; + sja1105_sgmii_write(priv, SJA1105_AC, ac); + /* BASIC_CONTROL: enable in-band AN now, if requested. Otherwise, + * sja1105_sgmii_pcs_force_speed must be called later for the link + * to become operational. + */ + if (an_enabled) + sja1105_sgmii_write(priv, MII_BMCR, + BMCR_ANENABLE | BMCR_ANRESTART); +} + +static void sja1105_sgmii_pcs_force_speed(struct sja1105_private *priv, + int speed) +{ + int pcs_speed; + + switch (speed) { + case SPEED_1000: + pcs_speed = BMCR_SPEED1000; + break; + case SPEED_100: + pcs_speed = BMCR_SPEED100; + break; + case SPEED_10: + pcs_speed = BMCR_SPEED10; + break; + default: + dev_err(priv->ds->dev, "Invalid speed %d\n", speed); + return; + } + sja1105_sgmii_write(priv, MII_BMCR, pcs_speed | BMCR_FULLDPLX); +} + /* Convert link speed from SJA1105 to ethtool encoding */ static int sja1105_speed[] = { [SJA1105_SPEED_AUTO] = SPEED_UNKNOWN, @@ -704,8 +854,13 @@ static int sja1105_adjust_port_config(struct sja1105_private *priv, int port, * table, since this will be used for the clocking setup, and we no * longer need to store it in the static config (already told hardware * we want auto during upload phase). + * Actually for the SGMII port, the MAC is fixed at 1 Gbps and + * we need to configure the PCS only (if even that). */ - mac[port].speed = speed; + if (sja1105_supports_sgmii(priv, port)) + mac[port].speed = SJA1105_SPEED_1000MBPS; + else + mac[port].speed = speed; /* Write to the dynamic reconfiguration tables */ rc = sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port, @@ -754,26 +909,34 @@ static bool sja1105_phy_mode_mismatch(struct sja1105_private *priv, int port, case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_TXID: return (phy_mode != XMII_MODE_RGMII); + case PHY_INTERFACE_MODE_SGMII: + return (phy_mode != XMII_MODE_SGMII); default: return true; } } static void sja1105_mac_config(struct dsa_switch *ds, int port, - unsigned int link_an_mode, + unsigned int mode, const struct phylink_link_state *state) { struct sja1105_private *priv = ds->priv; + bool is_sgmii = sja1105_supports_sgmii(priv, port); - if (sja1105_phy_mode_mismatch(priv, port, state->interface)) + if (sja1105_phy_mode_mismatch(priv, port, state->interface)) { + dev_err(ds->dev, "Changing PHY mode to %s not supported!\n", + phy_modes(state->interface)); return; + } - if (link_an_mode == MLO_AN_INBAND) { + if (phylink_autoneg_inband(mode) && !is_sgmii) { dev_err(ds->dev, "In-band AN not supported!\n"); return; } - sja1105_adjust_port_config(priv, port, state->speed); + if (is_sgmii) + sja1105_sgmii_pcs_config(priv, phylink_autoneg_inband(mode), + false); } static void sja1105_mac_link_down(struct dsa_switch *ds, int port, @@ -790,7 +953,14 @@ static void sja1105_mac_link_up(struct dsa_switch *ds, int port, int speed, int duplex, bool tx_pause, bool rx_pause) { - sja1105_inhibit_tx(ds->priv, BIT(port), false); + struct sja1105_private *priv = ds->priv; + + sja1105_adjust_port_config(priv, port, speed); + + if (sja1105_supports_sgmii(priv, port) && !phylink_autoneg_inband(mode)) + sja1105_sgmii_pcs_force_speed(priv, speed); + + sja1105_inhibit_tx(priv, BIT(port), false); } static void sja1105_phylink_validate(struct dsa_switch *ds, int port, @@ -825,7 +995,8 @@ static void sja1105_phylink_validate(struct dsa_switch *ds, int port, phylink_set(mask, 10baseT_Full); phylink_set(mask, 100baseT_Full); phylink_set(mask, 100baseT1_Full); - if (mii->xmii_mode[port] == XMII_MODE_RGMII) + if (mii->xmii_mode[port] == XMII_MODE_RGMII || + mii->xmii_mode[port] == XMII_MODE_SGMII) phylink_set(mask, 1000baseT_Full); bitmap_and(supported, supported, mask, __ETHTOOL_LINK_MODE_MASK_NBITS); @@ -833,6 +1004,38 @@ static void sja1105_phylink_validate(struct dsa_switch *ds, int port, __ETHTOOL_LINK_MODE_MASK_NBITS); } +static int sja1105_mac_pcs_get_state(struct dsa_switch *ds, int port, + struct phylink_link_state *state) +{ + struct sja1105_private *priv = ds->priv; + int ais; + + /* Read the vendor-specific AUTONEG_INTR_STATUS register */ + ais = sja1105_sgmii_read(priv, SJA1105_AIS); + if (ais < 0) + return ais; + + switch (SJA1105_AIS_SPEED(ais)) { + case 0: + state->speed = SPEED_10; + break; + case 1: + state->speed = SPEED_100; + break; + case 2: + state->speed = SPEED_1000; + break; + default: + dev_err(ds->dev, "Invalid SGMII PCS speed %lu\n", + SJA1105_AIS_SPEED(ais)); + } + state->duplex = SJA1105_AIS_DUPLEX_MODE(ais); + state->an_complete = SJA1105_AIS_COMPLETE(ais); + state->link = SJA1105_AIS_LINK_STATUS(ais); + + return 0; +} + static int sja1105_find_static_fdb_entry(struct sja1105_private *priv, int port, const struct sja1105_l2_lookup_entry *requested) @@ -1359,6 +1562,7 @@ int sja1105_static_config_reload(struct sja1105_private *priv, struct dsa_switch *ds = priv->ds; s64 t1, t2, t3, t4; s64 t12, t34; + u16 bmcr = 0; int rc, i; s64 now; @@ -1376,6 +1580,9 @@ int sja1105_static_config_reload(struct sja1105_private *priv, mac[i].speed = SJA1105_SPEED_AUTO; } + if (sja1105_supports_sgmii(priv, SJA1105_SGMII_PORT)) + bmcr = sja1105_sgmii_read(priv, MII_BMCR); + /* No PTP operations can run right now */ mutex_lock(&priv->ptp_data.lock); @@ -1425,6 +1632,25 @@ out_unlock_ptp: if (rc < 0) goto out; } + + if (sja1105_supports_sgmii(priv, SJA1105_SGMII_PORT)) { + bool an_enabled = !!(bmcr & BMCR_ANENABLE); + + sja1105_sgmii_pcs_config(priv, an_enabled, false); + + if (!an_enabled) { + int speed = SPEED_UNKNOWN; + + if (bmcr & BMCR_SPEED1000) + speed = SPEED_1000; + else if (bmcr & BMCR_SPEED100) + speed = SPEED_100; + else if (bmcr & BMCR_SPEED10) + speed = SPEED_10; + + sja1105_sgmii_pcs_force_speed(priv, speed); + } + } out: mutex_unlock(&priv->mgmt_lock); @@ -1744,7 +1970,8 @@ static void sja1105_teardown(struct dsa_switch *ds) if (!dsa_is_user_port(ds, port)) continue; - kthread_destroy_worker(sp->xmit_worker); + if (sp->xmit_worker) + kthread_destroy_worker(sp->xmit_worker); } sja1105_tas_teardown(ds); @@ -1989,6 +2216,7 @@ static const struct dsa_switch_ops sja1105_switch_ops = { .teardown = sja1105_teardown, .set_ageing_time = sja1105_set_ageing_time, .phylink_validate = sja1105_phylink_validate, + .phylink_mac_link_state = sja1105_mac_pcs_get_state, .phylink_mac_config = sja1105_mac_config, .phylink_mac_link_up = sja1105_mac_link_up, .phylink_mac_link_down = sja1105_mac_link_down, diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c index a836fc38c4a4..a22f8e3fc06b 100644 --- a/drivers/net/dsa/sja1105/sja1105_ptp.c +++ b/drivers/net/dsa/sja1105/sja1105_ptp.c @@ -14,6 +14,17 @@ #define SJA1105_MAX_ADJ_PPB 32000000 #define SJA1105_SIZE_PTP_CMD 4 +/* PTPSYNCTS has no interrupt or update mechanism, because the intended + * hardware use case is for the timestamp to be collected synchronously, + * immediately after the CAS_MASTER SJA1105 switch has triggered a CASSYNC + * pulse on the PTP_CLK pin. When used as a generic extts source, it needs + * polling and a comparison with the old value. The polling interval is just + * the Nyquist rate of a canonical PPS input (e.g. from a GPS module). + * Anything of higher frequency than 1 Hz will be lost, since there is no + * timestamp FIFO. + */ +#define SJA1105_EXTTS_INTERVAL (HZ / 2) + /* This range is actually +/- SJA1105_MAX_ADJ_PPB * divided by 1000 (ppb -> ppm) and with a 16-bit * "fractional" part (actually fixed point). @@ -39,44 +50,13 @@ enum sja1105_ptp_clk_mode { PTP_SET_MODE = 0, }; +#define extts_to_data(d) \ + container_of((d), struct sja1105_ptp_data, extts_work) #define ptp_caps_to_data(d) \ container_of((d), struct sja1105_ptp_data, caps) #define ptp_data_to_sja1105(d) \ container_of((d), struct sja1105_private, ptp_data) -static int sja1105_init_avb_params(struct sja1105_private *priv, - bool on) -{ - struct sja1105_avb_params_entry *avb; - struct sja1105_table *table; - - table = &priv->static_config.tables[BLK_IDX_AVB_PARAMS]; - - /* Discard previous AVB Parameters Table */ - if (table->entry_count) { - kfree(table->entries); - table->entry_count = 0; - } - - /* Configure the reception of meta frames only if requested */ - if (!on) - return 0; - - table->entries = kcalloc(SJA1105_MAX_AVB_PARAMS_COUNT, - table->ops->unpacked_entry_size, GFP_KERNEL); - if (!table->entries) - return -ENOMEM; - - table->entry_count = SJA1105_MAX_AVB_PARAMS_COUNT; - - avb = table->entries; - - avb->destmeta = SJA1105_META_DMAC; - avb->srcmeta = SJA1105_META_SMAC; - - return 0; -} - /* Must be called only with priv->tagger_data.state bit * SJA1105_HWTS_RX_EN cleared */ @@ -86,17 +66,12 @@ static int sja1105_change_rxtstamping(struct sja1105_private *priv, struct sja1105_ptp_data *ptp_data = &priv->ptp_data; struct sja1105_general_params_entry *general_params; struct sja1105_table *table; - int rc; table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS]; general_params = table->entries; general_params->send_meta1 = on; general_params->send_meta0 = on; - rc = sja1105_init_avb_params(priv, on); - if (rc < 0) - return rc; - /* Initialize the meta state machine to a known state */ if (priv->tagger_data.stampable_skb) { kfree_skb(priv->tagger_data.stampable_skb); @@ -206,6 +181,8 @@ void sja1105et_ptp_cmd_packing(u8 *buf, struct sja1105_ptp_cmd *cmd, sja1105_packing(buf, &valid, 31, 31, size, op); sja1105_packing(buf, &cmd->ptpstrtsch, 30, 30, size, op); sja1105_packing(buf, &cmd->ptpstopsch, 29, 29, size, op); + sja1105_packing(buf, &cmd->startptpcp, 28, 28, size, op); + sja1105_packing(buf, &cmd->stopptpcp, 27, 27, size, op); sja1105_packing(buf, &cmd->resptp, 2, 2, size, op); sja1105_packing(buf, &cmd->corrclk4ts, 1, 1, size, op); sja1105_packing(buf, &cmd->ptpclkadd, 0, 0, size, op); @@ -221,6 +198,8 @@ void sja1105pqrs_ptp_cmd_packing(u8 *buf, struct sja1105_ptp_cmd *cmd, sja1105_packing(buf, &valid, 31, 31, size, op); sja1105_packing(buf, &cmd->ptpstrtsch, 30, 30, size, op); sja1105_packing(buf, &cmd->ptpstopsch, 29, 29, size, op); + sja1105_packing(buf, &cmd->startptpcp, 28, 28, size, op); + sja1105_packing(buf, &cmd->stopptpcp, 27, 27, size, op); sja1105_packing(buf, &cmd->resptp, 3, 3, size, op); sja1105_packing(buf, &cmd->corrclk4ts, 2, 2, size, op); sja1105_packing(buf, &cmd->ptpclkadd, 0, 0, size, op); @@ -615,6 +594,227 @@ static int sja1105_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) return rc; } +static void sja1105_ptp_extts_work(struct work_struct *work) +{ + struct delayed_work *dw = to_delayed_work(work); + struct sja1105_ptp_data *ptp_data = extts_to_data(dw); + struct sja1105_private *priv = ptp_data_to_sja1105(ptp_data); + const struct sja1105_regs *regs = priv->info->regs; + struct ptp_clock_event event; + u64 ptpsyncts = 0; + int rc; + + mutex_lock(&ptp_data->lock); + + rc = sja1105_xfer_u64(priv, SPI_READ, regs->ptpsyncts, &ptpsyncts, + NULL); + if (rc < 0) + dev_err_ratelimited(priv->ds->dev, + "Failed to read PTPSYNCTS: %d\n", rc); + + if (ptpsyncts && ptp_data->ptpsyncts != ptpsyncts) { + event.index = 0; + event.type = PTP_CLOCK_EXTTS; + event.timestamp = ns_to_ktime(sja1105_ticks_to_ns(ptpsyncts)); + ptp_clock_event(ptp_data->clock, &event); + + ptp_data->ptpsyncts = ptpsyncts; + } + + mutex_unlock(&ptp_data->lock); + + schedule_delayed_work(&ptp_data->extts_work, SJA1105_EXTTS_INTERVAL); +} + +static int sja1105_change_ptp_clk_pin_func(struct sja1105_private *priv, + enum ptp_pin_function func) +{ + struct sja1105_avb_params_entry *avb; + enum ptp_pin_function old_func; + + avb = priv->static_config.tables[BLK_IDX_AVB_PARAMS].entries; + + if (priv->info->device_id == SJA1105E_DEVICE_ID || + priv->info->device_id == SJA1105T_DEVICE_ID || + avb->cas_master) + old_func = PTP_PF_PEROUT; + else + old_func = PTP_PF_EXTTS; + + if (func == old_func) + return 0; + + avb->cas_master = (func == PTP_PF_PEROUT); + + return sja1105_dynamic_config_write(priv, BLK_IDX_AVB_PARAMS, 0, avb, + true); +} + +/* The PTP_CLK pin may be configured to toggle with a 50% duty cycle and a + * frequency f: + * + * NSEC_PER_SEC + * f = ---------------------- + * (PTPPINDUR * 8 ns) * 2 + */ +static int sja1105_per_out_enable(struct sja1105_private *priv, + struct ptp_perout_request *perout, + bool on) +{ + struct sja1105_ptp_data *ptp_data = &priv->ptp_data; + const struct sja1105_regs *regs = priv->info->regs; + struct sja1105_ptp_cmd cmd = ptp_data->cmd; + int rc; + + /* We only support one channel */ + if (perout->index != 0) + return -EOPNOTSUPP; + + /* Reject requests with unsupported flags */ + if (perout->flags) + return -EOPNOTSUPP; + + mutex_lock(&ptp_data->lock); + + rc = sja1105_change_ptp_clk_pin_func(priv, PTP_PF_PEROUT); + if (rc) + goto out; + + if (on) { + struct timespec64 pin_duration_ts = { + .tv_sec = perout->period.sec, + .tv_nsec = perout->period.nsec, + }; + struct timespec64 pin_start_ts = { + .tv_sec = perout->start.sec, + .tv_nsec = perout->start.nsec, + }; + u64 pin_duration = timespec64_to_ns(&pin_duration_ts); + u64 pin_start = timespec64_to_ns(&pin_start_ts); + u32 pin_duration32; + u64 now; + + /* ptppindur: 32 bit register which holds the interval between + * 2 edges on PTP_CLK. So check for truncation which happens + * at periods larger than around 68.7 seconds. + */ + pin_duration = ns_to_sja1105_ticks(pin_duration / 2); + if (pin_duration > U32_MAX) { + rc = -ERANGE; + goto out; + } + pin_duration32 = pin_duration; + + /* ptppins: 64 bit register which needs to hold a PTP time + * larger than the current time, otherwise the startptpcp + * command won't do anything. So advance the current time + * by a number of periods in a way that won't alter the + * phase offset. + */ + rc = __sja1105_ptp_gettimex(priv->ds, &now, NULL); + if (rc < 0) + goto out; + + pin_start = future_base_time(pin_start, pin_duration, + now + 1ull * NSEC_PER_SEC); + pin_start = ns_to_sja1105_ticks(pin_start); + + rc = sja1105_xfer_u64(priv, SPI_WRITE, regs->ptppinst, + &pin_start, NULL); + if (rc < 0) + goto out; + + rc = sja1105_xfer_u32(priv, SPI_WRITE, regs->ptppindur, + &pin_duration32, NULL); + if (rc < 0) + goto out; + } + + if (on) + cmd.startptpcp = true; + else + cmd.stopptpcp = true; + + rc = sja1105_ptp_commit(priv->ds, &cmd, SPI_WRITE); + +out: + mutex_unlock(&ptp_data->lock); + + return rc; +} + +static int sja1105_extts_enable(struct sja1105_private *priv, + struct ptp_extts_request *extts, + bool on) +{ + int rc; + + /* We only support one channel */ + if (extts->index != 0) + return -EOPNOTSUPP; + + /* Reject requests with unsupported flags */ + if (extts->flags) + return -EOPNOTSUPP; + + rc = sja1105_change_ptp_clk_pin_func(priv, PTP_PF_EXTTS); + if (rc) + return rc; + + if (on) + schedule_delayed_work(&priv->ptp_data.extts_work, + SJA1105_EXTTS_INTERVAL); + else + cancel_delayed_work_sync(&priv->ptp_data.extts_work); + + return 0; +} + +static int sja1105_ptp_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *req, int on) +{ + struct sja1105_ptp_data *ptp_data = ptp_caps_to_data(ptp); + struct sja1105_private *priv = ptp_data_to_sja1105(ptp_data); + int rc = -EOPNOTSUPP; + + if (req->type == PTP_CLK_REQ_PEROUT) + rc = sja1105_per_out_enable(priv, &req->perout, on); + else if (req->type == PTP_CLK_REQ_EXTTS) + rc = sja1105_extts_enable(priv, &req->extts, on); + + return rc; +} + +static int sja1105_ptp_verify_pin(struct ptp_clock_info *ptp, unsigned int pin, + enum ptp_pin_function func, unsigned int chan) +{ + struct sja1105_ptp_data *ptp_data = ptp_caps_to_data(ptp); + struct sja1105_private *priv = ptp_data_to_sja1105(ptp_data); + + if (chan != 0 || pin != 0) + return -1; + + switch (func) { + case PTP_PF_NONE: + case PTP_PF_PEROUT: + break; + case PTP_PF_EXTTS: + if (priv->info->device_id == SJA1105E_DEVICE_ID || + priv->info->device_id == SJA1105T_DEVICE_ID) + return -1; + break; + default: + return -1; + } + return 0; +} + +static struct ptp_pin_desc sja1105_ptp_pin = { + .name = "ptp_clk", + .index = 0, + .func = PTP_PF_NONE, +}; + int sja1105_ptp_clock_register(struct dsa_switch *ds) { struct sja1105_private *priv = ds->priv; @@ -628,8 +828,14 @@ int sja1105_ptp_clock_register(struct dsa_switch *ds) .adjtime = sja1105_ptp_adjtime, .gettimex64 = sja1105_ptp_gettimex, .settime64 = sja1105_ptp_settime, + .enable = sja1105_ptp_enable, + .verify = sja1105_ptp_verify_pin, .do_aux_work = sja1105_rxtstamp_work, .max_adj = SJA1105_MAX_ADJ_PPB, + .pin_config = &sja1105_ptp_pin, + .n_pins = 1, + .n_ext_ts = 1, + .n_per_out = 1, }; skb_queue_head_init(&ptp_data->skb_rxtstamp_queue); @@ -642,6 +848,8 @@ int sja1105_ptp_clock_register(struct dsa_switch *ds) ptp_data->cmd.corrclk4ts = true; ptp_data->cmd.ptpclkadd = PTP_SET_MODE; + INIT_DELAYED_WORK(&ptp_data->extts_work, sja1105_ptp_extts_work); + return sja1105_ptp_reset(ds); } @@ -653,6 +861,7 @@ void sja1105_ptp_clock_unregister(struct dsa_switch *ds) if (IS_ERR_OR_NULL(ptp_data->clock)) return; + cancel_delayed_work_sync(&ptp_data->extts_work); ptp_cancel_worker_sync(ptp_data->clock); skb_queue_purge(&ptp_data->skb_rxtstamp_queue); ptp_clock_unregister(ptp_data->clock); diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.h b/drivers/net/dsa/sja1105/sja1105_ptp.h index 6f4a19eec709..43480b24f1f0 100644 --- a/drivers/net/dsa/sja1105/sja1105_ptp.h +++ b/drivers/net/dsa/sja1105/sja1105_ptp.h @@ -21,7 +21,36 @@ static inline s64 sja1105_ticks_to_ns(s64 ticks) return ticks * SJA1105_TICK_NS; } +/* Calculate the first base_time in the future that satisfies this + * relationship: + * + * future_base_time = base_time + N x cycle_time >= now, or + * + * now - base_time + * N >= --------------- + * cycle_time + * + * Because N is an integer, the ceiling value of the above "a / b" ratio + * is in fact precisely the floor value of "(a + b - 1) / b", which is + * easier to calculate only having integer division tools. + */ +static inline s64 future_base_time(s64 base_time, s64 cycle_time, s64 now) +{ + s64 a, b, n; + + if (base_time >= now) + return base_time; + + a = now - base_time; + b = cycle_time; + n = div_s64(a + b - 1, b); + + return base_time + n * cycle_time; +} + struct sja1105_ptp_cmd { + u64 startptpcp; /* start toggling PTP_CLK pin */ + u64 stopptpcp; /* stop toggling PTP_CLK pin */ u64 ptpstrtsch; /* start schedule */ u64 ptpstopsch; /* stop schedule */ u64 resptp; /* reset */ @@ -30,12 +59,14 @@ struct sja1105_ptp_cmd { }; struct sja1105_ptp_data { + struct delayed_work extts_work; struct sk_buff_head skb_rxtstamp_queue; struct ptp_clock_info caps; struct ptp_clock *clock; struct sja1105_ptp_cmd cmd; /* Serializes all operations on the PTP hardware clock */ struct mutex lock; + u64 ptpsyncts; }; int sja1105_ptp_clock_register(struct dsa_switch *ds); diff --git a/drivers/net/dsa/sja1105/sja1105_sgmii.h b/drivers/net/dsa/sja1105/sja1105_sgmii.h new file mode 100644 index 000000000000..24d9bc046e70 --- /dev/null +++ b/drivers/net/dsa/sja1105/sja1105_sgmii.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2020, NXP Semiconductors + */ +#ifndef _SJA1105_SGMII_H +#define _SJA1105_SGMII_H + +#define SJA1105_SGMII_PORT 4 + +/* DIGITAL_CONTROL_1 (address 1f8000h) */ +#define SJA1105_DC1 0x8000 +#define SJA1105_DC1_VS_RESET BIT(15) +#define SJA1105_DC1_REMOTE_LOOPBACK BIT(14) +#define SJA1105_DC1_EN_VSMMD1 BIT(13) +#define SJA1105_DC1_POWER_SAVE BIT(11) +#define SJA1105_DC1_CLOCK_STOP_EN BIT(10) +#define SJA1105_DC1_MAC_AUTO_SW BIT(9) +#define SJA1105_DC1_INIT BIT(8) +#define SJA1105_DC1_TX_DISABLE BIT(4) +#define SJA1105_DC1_AUTONEG_TIMER_OVRR BIT(3) +#define SJA1105_DC1_BYP_POWERUP BIT(1) +#define SJA1105_DC1_PHY_MODE_CONTROL BIT(0) + +/* DIGITAL_CONTROL_2 register (address 1f80E1h) */ +#define SJA1105_DC2 0x80e1 +#define SJA1105_DC2_TX_POL_INV_DISABLE BIT(4) +#define SJA1105_DC2_RX_POL_INV BIT(0) + +/* DIGITAL_ERROR_CNT register (address 1f80E2h) */ +#define SJA1105_DEC 0x80e2 +#define SJA1105_DEC_ICG_EC_ENA BIT(4) +#define SJA1105_DEC_CLEAR_ON_READ BIT(0) + +/* AUTONEG_CONTROL register (address 1f8001h) */ +#define SJA1105_AC 0x8001 +#define SJA1105_AC_MII_CONTROL BIT(8) +#define SJA1105_AC_SGMII_LINK BIT(4) +#define SJA1105_AC_PHY_MODE BIT(3) +#define SJA1105_AC_AUTONEG_MODE(x) (((x) << 1) & GENMASK(2, 1)) +#define SJA1105_AC_AUTONEG_MODE_SGMII SJA1105_AC_AUTONEG_MODE(2) + +/* AUTONEG_INTR_STATUS register (address 1f8002h) */ +#define SJA1105_AIS 0x8002 +#define SJA1105_AIS_LINK_STATUS(x) (!!((x) & BIT(4))) +#define SJA1105_AIS_SPEED(x) (((x) & GENMASK(3, 2)) >> 2) +#define SJA1105_AIS_DUPLEX_MODE(x) (!!((x) & BIT(1))) +#define SJA1105_AIS_COMPLETE(x) (!!((x) & BIT(0))) + +/* DEBUG_CONTROL register (address 1f8005h) */ +#define SJA1105_DC 0x8005 +#define SJA1105_DC_SUPPRESS_LOS BIT(4) +#define SJA1105_DC_RESTART_SYNC BIT(0) + +#endif diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c index 29b127f3bf9c..fef2c50cd3f6 100644 --- a/drivers/net/dsa/sja1105/sja1105_spi.c +++ b/drivers/net/dsa/sja1105/sja1105_spi.c @@ -458,6 +458,8 @@ static struct sja1105_regs sja1105et_regs = { .rmii_ext_tx_clk = {0x100018, 0x10001F, 0x100026, 0x10002D, 0x100034}, .ptpegr_ts = {0xC0, 0xC2, 0xC4, 0xC6, 0xC8}, .ptpschtm = 0x12, /* Spans 0x12 to 0x13 */ + .ptppinst = 0x14, + .ptppindur = 0x16, .ptp_control = 0x17, .ptpclkval = 0x18, /* Spans 0x18 to 0x19 */ .ptpclkrate = 0x1A, @@ -474,6 +476,7 @@ static struct sja1105_regs sja1105pqrs_regs = { /* UM10944.pdf, Table 86, ACU Register overview */ .pad_mii_tx = {0x100800, 0x100802, 0x100804, 0x100806, 0x100808}, .pad_mii_id = {0x100810, 0x100811, 0x100812, 0x100813, 0x100814}, + .sgmii = 0x1F0000, .rmii_pll1 = 0x10000A, .cgu_idiv = {0x10000B, 0x10000C, 0x10000D, 0x10000E, 0x10000F}, .mac = {0x200, 0x202, 0x204, 0x206, 0x208}, @@ -490,10 +493,13 @@ static struct sja1105_regs sja1105pqrs_regs = { .qlevel = {0x604, 0x614, 0x624, 0x634, 0x644}, .ptpegr_ts = {0xC0, 0xC4, 0xC8, 0xCC, 0xD0}, .ptpschtm = 0x13, /* Spans 0x13 to 0x14 */ + .ptppinst = 0x15, + .ptppindur = 0x17, .ptp_control = 0x18, .ptpclkval = 0x19, .ptpclkrate = 0x1B, .ptpclkcorp = 0x1E, + .ptpsyncts = 0x1F, }; struct sja1105_info sja1105e_info = { diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c index 63d2311817c4..bbfe034910a0 100644 --- a/drivers/net/dsa/sja1105/sja1105_static_config.c +++ b/drivers/net/dsa/sja1105/sja1105_static_config.c @@ -102,12 +102,13 @@ static size_t sja1105et_avb_params_entry_packing(void *buf, void *entry_ptr, return size; } -static size_t sja1105pqrs_avb_params_entry_packing(void *buf, void *entry_ptr, - enum packing_op op) +size_t sja1105pqrs_avb_params_entry_packing(void *buf, void *entry_ptr, + enum packing_op op) { const size_t size = SJA1105PQRS_SIZE_AVB_PARAMS_ENTRY; struct sja1105_avb_params_entry *entry = entry_ptr; + sja1105_packing(buf, &entry->cas_master, 126, 126, size, op); sja1105_packing(buf, &entry->destmeta, 125, 78, size, op); sja1105_packing(buf, &entry->srcmeta, 77, 30, size, op); return size; diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.h b/drivers/net/dsa/sja1105/sja1105_static_config.h index f4a5c5c04311..8afafb6aef12 100644 --- a/drivers/net/dsa/sja1105/sja1105_static_config.h +++ b/drivers/net/dsa/sja1105/sja1105_static_config.h @@ -230,6 +230,7 @@ struct sja1105_l2_policing_entry { }; struct sja1105_avb_params_entry { + u64 cas_master; u64 destmeta; u64 srcmeta; }; diff --git a/drivers/net/dsa/sja1105/sja1105_tas.c b/drivers/net/dsa/sja1105/sja1105_tas.c index fa6750d973d7..77e547b4cd89 100644 --- a/drivers/net/dsa/sja1105/sja1105_tas.c +++ b/drivers/net/dsa/sja1105/sja1105_tas.c @@ -28,33 +28,6 @@ static s64 sja1105_delta_to_ns(s64 delta) return delta * 200; } -/* Calculate the first base_time in the future that satisfies this - * relationship: - * - * future_base_time = base_time + N x cycle_time >= now, or - * - * now - base_time - * N >= --------------- - * cycle_time - * - * Because N is an integer, the ceiling value of the above "a / b" ratio - * is in fact precisely the floor value of "(a + b - 1) / b", which is - * easier to calculate only having integer division tools. - */ -static s64 future_base_time(s64 base_time, s64 cycle_time, s64 now) -{ - s64 a, b, n; - - if (base_time >= now) - return base_time; - - a = now - base_time; - b = cycle_time; - n = div_s64(a + b - 1, b); - - return base_time + n * cycle_time; -} - static int sja1105_tas_set_runtime_params(struct sja1105_private *priv) { struct sja1105_tas_data *tas_data = &priv->tas_data; diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c index 4383ee615793..5ed33c2c4742 100644 --- a/drivers/net/ethernet/3com/typhoon.c +++ b/drivers/net/ethernet/3com/typhoon.c @@ -310,7 +310,7 @@ enum state_values { * cannot pass a read, so this forces current writes to post. */ #define typhoon_post_pci_writes(x) \ - do { if(likely(use_mmio)) ioread32(x+TYPHOON_REG_HEARTBEAT); } while(0) + do { if (likely(use_mmio)) ioread32(x+TYPHOON_REG_HEARTBEAT); } while (0) /* We'll wait up to six seconds for a reset, and half a second normally. */ @@ -380,7 +380,7 @@ typhoon_reset(void __iomem *ioaddr, int wait_type) int i, err = 0; int timeout; - if(wait_type == WaitNoSleep) + if (wait_type == WaitNoSleep) timeout = TYPHOON_RESET_TIMEOUT_NOSLEEP; else timeout = TYPHOON_RESET_TIMEOUT_SLEEP; @@ -393,13 +393,13 @@ typhoon_reset(void __iomem *ioaddr, int wait_type) udelay(1); iowrite32(TYPHOON_RESET_NONE, ioaddr + TYPHOON_REG_SOFT_RESET); - if(wait_type != NoWait) { - for(i = 0; i < timeout; i++) { - if(ioread32(ioaddr + TYPHOON_REG_STATUS) == + if (wait_type != NoWait) { + for (i = 0; i < timeout; i++) { + if (ioread32(ioaddr + TYPHOON_REG_STATUS) == TYPHOON_STATUS_WAITING_FOR_HOST) goto out; - if(wait_type == WaitSleep) + if (wait_type == WaitSleep) schedule_timeout_uninterruptible(1); else udelay(TYPHOON_UDELAY); @@ -422,7 +422,7 @@ out: * which should be enough (I've see it work well at 100us, but still * saw occasional problems.) */ - if(wait_type == WaitSleep) + if (wait_type == WaitSleep) msleep(5); else udelay(500); @@ -434,8 +434,8 @@ typhoon_wait_status(void __iomem *ioaddr, u32 wait_value) { int i, err = 0; - for(i = 0; i < TYPHOON_WAIT_TIMEOUT; i++) { - if(ioread32(ioaddr + TYPHOON_REG_STATUS) == wait_value) + for (i = 0; i < TYPHOON_WAIT_TIMEOUT; i++) { + if (ioread32(ioaddr + TYPHOON_REG_STATUS) == wait_value) goto out; udelay(TYPHOON_UDELAY); } @@ -449,7 +449,7 @@ out: static inline void typhoon_media_status(struct net_device *dev, struct resp_desc *resp) { - if(resp->parm1 & TYPHOON_MEDIA_STAT_NO_LINK) + if (resp->parm1 & TYPHOON_MEDIA_STAT_NO_LINK) netif_carrier_off(dev); else netif_carrier_on(dev); @@ -465,7 +465,7 @@ typhoon_hello(struct typhoon *tp) * card in a long while. If the lock is held, then we're in the * process of issuing a command, so we don't need to respond. */ - if(spin_trylock(&tp->command_lock)) { + if (spin_trylock(&tp->command_lock)) { cmd = (struct cmd_desc *)(ring->ringBase + ring->lastWrite); typhoon_inc_cmd_index(&ring->lastWrite, 1); @@ -489,32 +489,32 @@ typhoon_process_response(struct typhoon *tp, int resp_size, cleared = le32_to_cpu(indexes->respCleared); ready = le32_to_cpu(indexes->respReady); - while(cleared != ready) { + while (cleared != ready) { resp = (struct resp_desc *)(base + cleared); count = resp->numDesc + 1; - if(resp_save && resp->seqNo) { - if(count > resp_size) { + if (resp_save && resp->seqNo) { + if (count > resp_size) { resp_save->flags = TYPHOON_RESP_ERROR; goto cleanup; } wrap_len = 0; len = count * sizeof(*resp); - if(unlikely(cleared + len > RESPONSE_RING_SIZE)) { + if (unlikely(cleared + len > RESPONSE_RING_SIZE)) { wrap_len = cleared + len - RESPONSE_RING_SIZE; len = RESPONSE_RING_SIZE - cleared; } memcpy(resp_save, resp, len); - if(unlikely(wrap_len)) { + if (unlikely(wrap_len)) { resp_save += len / sizeof(*resp); memcpy(resp_save, base, wrap_len); } resp_save = NULL; - } else if(resp->cmd == TYPHOON_CMD_READ_MEDIA_STATUS) { + } else if (resp->cmd == TYPHOON_CMD_READ_MEDIA_STATUS) { typhoon_media_status(tp->dev, resp); - } else if(resp->cmd == TYPHOON_CMD_HELLO_RESP) { + } else if (resp->cmd == TYPHOON_CMD_HELLO_RESP) { typhoon_hello(tp); } else { netdev_err(tp->dev, @@ -588,19 +588,19 @@ typhoon_issue_command(struct typhoon *tp, int num_cmd, struct cmd_desc *cmd, freeCmd = typhoon_num_free_cmd(tp); freeResp = typhoon_num_free_resp(tp); - if(freeCmd < num_cmd || freeResp < num_resp) { + if (freeCmd < num_cmd || freeResp < num_resp) { netdev_err(tp->dev, "no descs for cmd, had (needed) %d (%d) cmd, %d (%d) resp\n", freeCmd, num_cmd, freeResp, num_resp); err = -ENOMEM; goto out; } - if(cmd->flags & TYPHOON_CMD_RESPOND) { + if (cmd->flags & TYPHOON_CMD_RESPOND) { /* If we're expecting a response, but the caller hasn't given * us a place to put it, we'll provide one. */ tp->awaiting_resp = 1; - if(resp == NULL) { + if (resp == NULL) { resp = &local_resp; num_resp = 1; } @@ -608,13 +608,13 @@ typhoon_issue_command(struct typhoon *tp, int num_cmd, struct cmd_desc *cmd, wrap_len = 0; len = num_cmd * sizeof(*cmd); - if(unlikely(ring->lastWrite + len > COMMAND_RING_SIZE)) { + if (unlikely(ring->lastWrite + len > COMMAND_RING_SIZE)) { wrap_len = ring->lastWrite + len - COMMAND_RING_SIZE; len = COMMAND_RING_SIZE - ring->lastWrite; } memcpy(ring->ringBase + ring->lastWrite, cmd, len); - if(unlikely(wrap_len)) { + if (unlikely(wrap_len)) { struct cmd_desc *wrap_ptr = cmd; wrap_ptr += len / sizeof(*cmd); memcpy(ring->ringBase, wrap_ptr, wrap_len); @@ -628,7 +628,7 @@ typhoon_issue_command(struct typhoon *tp, int num_cmd, struct cmd_desc *cmd, iowrite32(ring->lastWrite, tp->ioaddr + TYPHOON_REG_CMD_READY); typhoon_post_pci_writes(tp->ioaddr); - if((cmd->flags & TYPHOON_CMD_RESPOND) == 0) + if ((cmd->flags & TYPHOON_CMD_RESPOND) == 0) goto out; /* Ugh. We'll be here about 8ms, spinning our thumbs, unable to @@ -648,14 +648,14 @@ typhoon_issue_command(struct typhoon *tp, int num_cmd, struct cmd_desc *cmd, * wait here. */ got_resp = 0; - for(i = 0; i < TYPHOON_WAIT_TIMEOUT && !got_resp; i++) { - if(indexes->respCleared != indexes->respReady) + for (i = 0; i < TYPHOON_WAIT_TIMEOUT && !got_resp; i++) { + if (indexes->respCleared != indexes->respReady) got_resp = typhoon_process_response(tp, num_resp, resp); udelay(TYPHOON_UDELAY); } - if(!got_resp) { + if (!got_resp) { err = -ETIMEDOUT; goto out; } @@ -663,11 +663,11 @@ typhoon_issue_command(struct typhoon *tp, int num_cmd, struct cmd_desc *cmd, /* Collect the error response even if we don't care about the * rest of the response */ - if(resp->flags & TYPHOON_RESP_ERROR) + if (resp->flags & TYPHOON_RESP_ERROR) err = -EIO; out: - if(tp->awaiting_resp) { + if (tp->awaiting_resp) { tp->awaiting_resp = 0; smp_wmb(); @@ -678,7 +678,7 @@ out: * time. So, check for it, and interrupt ourselves if this * is the case. */ - if(indexes->respCleared != indexes->respReady) + if (indexes->respCleared != indexes->respReady) iowrite32(1, tp->ioaddr + TYPHOON_REG_SELF_INTERRUPT); } @@ -748,7 +748,7 @@ typhoon_start_tx(struct sk_buff *skb, struct net_device *dev) * between marking the queue awake and updating the cleared index. * Just loop and it will appear. This comes from the acenic driver. */ - while(unlikely(typhoon_num_free_tx(txRing) < (numDesc + 2))) + while (unlikely(typhoon_num_free_tx(txRing) < (numDesc + 2))) smp_rmb(); first_txd = (struct tx_desc *) (txRing->ringBase + txRing->lastWrite); @@ -760,7 +760,7 @@ typhoon_start_tx(struct sk_buff *skb, struct net_device *dev) first_txd->tx_addr = (u64)((unsigned long) skb); first_txd->processFlags = 0; - if(skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { /* The 3XP will figure out if this is UDP/TCP */ first_txd->processFlags |= TYPHOON_TX_PF_TCP_CHKSUM; first_txd->processFlags |= TYPHOON_TX_PF_UDP_CHKSUM; @@ -788,7 +788,7 @@ typhoon_start_tx(struct sk_buff *skb, struct net_device *dev) /* No need to worry about padding packet -- the firmware pads * it with zeros to ETH_ZLEN for us. */ - if(skb_shinfo(skb)->nr_frags == 0) { + if (skb_shinfo(skb)->nr_frags == 0) { skb_dma = pci_map_single(tp->tx_pdev, skb->data, skb->len, PCI_DMA_TODEVICE); txd->flags = TYPHOON_FRAG_DESC | TYPHOON_DESC_VALID; @@ -840,14 +840,14 @@ typhoon_start_tx(struct sk_buff *skb, struct net_device *dev) */ numDesc = MAX_SKB_FRAGS + TSO_NUM_DESCRIPTORS + 1; - if(typhoon_num_free_tx(txRing) < (numDesc + 2)) { + if (typhoon_num_free_tx(txRing) < (numDesc + 2)) { netif_stop_queue(dev); /* A Tx complete IRQ could have gotten between, making * the ring free again. Only need to recheck here, since * Tx is serialized. */ - if(typhoon_num_free_tx(txRing) >= (numDesc + 2)) + if (typhoon_num_free_tx(txRing) >= (numDesc + 2)) netif_wake_queue(dev); } @@ -863,7 +863,7 @@ typhoon_set_rx_mode(struct net_device *dev) __le16 filter; filter = TYPHOON_RX_FILTER_DIRECTED | TYPHOON_RX_FILTER_BROADCAST; - if(dev->flags & IFF_PROMISC) { + if (dev->flags & IFF_PROMISC) { filter |= TYPHOON_RX_FILTER_PROMISCOUS; } else if ((netdev_mc_count(dev) > multicast_filter_limit) || (dev->flags & IFF_ALLMULTI)) { @@ -905,7 +905,7 @@ typhoon_do_get_stats(struct typhoon *tp) INIT_COMMAND_WITH_RESPONSE(&xp_cmd, TYPHOON_CMD_READ_STATS); err = typhoon_issue_command(tp, 1, &xp_cmd, 7, xp_resp); - if(err < 0) + if (err < 0) return err; /* 3Com's Linux driver uses txMultipleCollisions as it's @@ -953,10 +953,10 @@ typhoon_get_stats(struct net_device *dev) struct net_device_stats *saved = &tp->stats_saved; smp_rmb(); - if(tp->card_state == Sleeping) + if (tp->card_state == Sleeping) return saved; - if(typhoon_do_get_stats(tp) < 0) { + if (typhoon_do_get_stats(tp) < 0) { netdev_err(dev, "error getting stats\n"); return saved; } @@ -973,12 +973,12 @@ typhoon_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) struct resp_desc xp_resp[3]; smp_rmb(); - if(tp->card_state == Sleeping) { + if (tp->card_state == Sleeping) { strlcpy(info->fw_version, "Sleep image", sizeof(info->fw_version)); } else { INIT_COMMAND_WITH_RESPONSE(&xp_cmd, TYPHOON_CMD_READ_VERSIONS); - if(typhoon_issue_command(tp, 1, &xp_cmd, 3, xp_resp) < 0) { + if (typhoon_issue_command(tp, 1, &xp_cmd, 3, xp_resp) < 0) { strlcpy(info->fw_version, "Unknown runtime", sizeof(info->fw_version)); } else { @@ -1025,7 +1025,7 @@ typhoon_get_link_ksettings(struct net_device *dev, break; } - if(tp->capabilities & TYPHOON_FIBER) { + if (tp->capabilities & TYPHOON_FIBER) { supported |= SUPPORTED_FIBRE; advertising |= ADVERTISED_FIBRE; cmd->base.port = PORT_FIBRE; @@ -1042,7 +1042,7 @@ typhoon_get_link_ksettings(struct net_device *dev, cmd->base.speed = tp->speed; cmd->base.duplex = tp->duplex; cmd->base.phy_address = 0; - if(tp->xcvr_select == TYPHOON_XCVR_AUTONEG) + if (tp->xcvr_select == TYPHOON_XCVR_AUTONEG) cmd->base.autoneg = AUTONEG_ENABLE; else cmd->base.autoneg = AUTONEG_DISABLE; @@ -1090,7 +1090,7 @@ typhoon_set_link_ksettings(struct net_device *dev, INIT_COMMAND_NO_RESPONSE(&xp_cmd, TYPHOON_CMD_XCVR_SELECT); xp_cmd.parm1 = xcvr; err = typhoon_issue_command(tp, 1, &xp_cmd, 0, NULL); - if(err < 0) + if (err < 0) goto out; tp->xcvr_select = xcvr; @@ -1113,9 +1113,9 @@ typhoon_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) wol->supported = WAKE_PHY | WAKE_MAGIC; wol->wolopts = 0; - if(tp->wol_events & TYPHOON_WAKE_LINK_EVENT) + if (tp->wol_events & TYPHOON_WAKE_LINK_EVENT) wol->wolopts |= WAKE_PHY; - if(tp->wol_events & TYPHOON_WAKE_MAGIC_PKT) + if (tp->wol_events & TYPHOON_WAKE_MAGIC_PKT) wol->wolopts |= WAKE_MAGIC; memset(&wol->sopass, 0, sizeof(wol->sopass)); } @@ -1125,13 +1125,13 @@ typhoon_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct typhoon *tp = netdev_priv(dev); - if(wol->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) + if (wol->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) return -EINVAL; tp->wol_events = 0; - if(wol->wolopts & WAKE_PHY) + if (wol->wolopts & WAKE_PHY) tp->wol_events |= TYPHOON_WAKE_LINK_EVENT; - if(wol->wolopts & WAKE_MAGIC) + if (wol->wolopts & WAKE_MAGIC) tp->wol_events |= TYPHOON_WAKE_MAGIC_PKT; return 0; @@ -1162,8 +1162,8 @@ typhoon_wait_interrupt(void __iomem *ioaddr) { int i, err = 0; - for(i = 0; i < TYPHOON_WAIT_TIMEOUT; i++) { - if(ioread32(ioaddr + TYPHOON_REG_INTR_STATUS) & + for (i = 0; i < TYPHOON_WAIT_TIMEOUT; i++) { + if (ioread32(ioaddr + TYPHOON_REG_INTR_STATUS) & TYPHOON_INTR_BOOTCMD) goto out; udelay(TYPHOON_UDELAY); @@ -1355,7 +1355,7 @@ typhoon_download_firmware(struct typhoon *tp) */ err = -ENOMEM; dpage = pci_alloc_consistent(pdev, PAGE_SIZE, &dpage_dma); - if(!dpage) { + if (!dpage) { netdev_err(tp->dev, "no DMA mem for firmware\n"); goto err_out; } @@ -1368,7 +1368,7 @@ typhoon_download_firmware(struct typhoon *tp) ioaddr + TYPHOON_REG_INTR_MASK); err = -ETIMEDOUT; - if(typhoon_wait_status(ioaddr, TYPHOON_STATUS_WAITING_FOR_HOST) < 0) { + if (typhoon_wait_status(ioaddr, TYPHOON_STATUS_WAITING_FOR_HOST) < 0) { netdev_err(tp->dev, "card ready timeout\n"); goto err_out_irq; } @@ -1397,16 +1397,16 @@ typhoon_download_firmware(struct typhoon *tp) * last write to the command register to post, so * we don't need a typhoon_post_pci_writes() after it. */ - for(i = 0; i < numSections; i++) { + for (i = 0; i < numSections; i++) { sHdr = (struct typhoon_section_header *) image_data; image_data += sizeof(struct typhoon_section_header); load_addr = le32_to_cpu(sHdr->startAddr); section_len = le32_to_cpu(sHdr->len); - while(section_len) { + while (section_len) { len = min_t(u32, section_len, PAGE_SIZE); - if(typhoon_wait_interrupt(ioaddr) < 0 || + if (typhoon_wait_interrupt(ioaddr) < 0 || ioread32(ioaddr + TYPHOON_REG_STATUS) != TYPHOON_STATUS_WAITING_FOR_SEGMENT) { netdev_err(tp->dev, "segment ready timeout\n"); @@ -1439,7 +1439,7 @@ typhoon_download_firmware(struct typhoon *tp) } } - if(typhoon_wait_interrupt(ioaddr) < 0 || + if (typhoon_wait_interrupt(ioaddr) < 0 || ioread32(ioaddr + TYPHOON_REG_STATUS) != TYPHOON_STATUS_WAITING_FOR_SEGMENT) { netdev_err(tp->dev, "final segment ready timeout\n"); @@ -1448,7 +1448,7 @@ typhoon_download_firmware(struct typhoon *tp) iowrite32(TYPHOON_BOOTCMD_DNLD_COMPLETE, ioaddr + TYPHOON_REG_COMMAND); - if(typhoon_wait_status(ioaddr, TYPHOON_STATUS_WAITING_FOR_BOOT) < 0) { + if (typhoon_wait_status(ioaddr, TYPHOON_STATUS_WAITING_FOR_BOOT) < 0) { netdev_err(tp->dev, "boot ready timeout, status 0x%0x\n", ioread32(ioaddr + TYPHOON_REG_STATUS)); goto err_out_irq; @@ -1471,7 +1471,7 @@ typhoon_boot_3XP(struct typhoon *tp, u32 initial_status) { void __iomem *ioaddr = tp->ioaddr; - if(typhoon_wait_status(ioaddr, initial_status) < 0) { + if (typhoon_wait_status(ioaddr, initial_status) < 0) { netdev_err(tp->dev, "boot ready timeout\n"); goto out_timeout; } @@ -1482,7 +1482,7 @@ typhoon_boot_3XP(struct typhoon *tp, u32 initial_status) iowrite32(TYPHOON_BOOTCMD_REG_BOOT_RECORD, ioaddr + TYPHOON_REG_COMMAND); - if(typhoon_wait_status(ioaddr, TYPHOON_STATUS_RUNNING) < 0) { + if (typhoon_wait_status(ioaddr, TYPHOON_STATUS_RUNNING) < 0) { netdev_err(tp->dev, "boot finish timeout (status 0x%x)\n", ioread32(ioaddr + TYPHOON_REG_STATUS)); goto out_timeout; @@ -1512,17 +1512,17 @@ typhoon_clean_tx(struct typhoon *tp, struct transmit_ring *txRing, int dma_len; int type; - while(lastRead != le32_to_cpu(*index)) { + while (lastRead != le32_to_cpu(*index)) { tx = (struct tx_desc *) (txRing->ringBase + lastRead); type = tx->flags & TYPHOON_TYPE_MASK; - if(type == TYPHOON_TX_DESC) { + if (type == TYPHOON_TX_DESC) { /* This tx_desc describes a packet. */ unsigned long ptr = tx->tx_addr; struct sk_buff *skb = (struct sk_buff *) ptr; dev_kfree_skb_irq(skb); - } else if(type == TYPHOON_FRAG_DESC) { + } else if (type == TYPHOON_FRAG_DESC) { /* This tx_desc describes a memory mapping. Free it. */ skb_dma = (dma_addr_t) le32_to_cpu(tx->frag.addr); @@ -1547,7 +1547,7 @@ typhoon_tx_complete(struct typhoon *tp, struct transmit_ring *txRing, /* This will need changing if we start to use the Hi Tx ring. */ lastRead = typhoon_clean_tx(tp, txRing, index); - if(netif_queue_stopped(tp->dev) && typhoon_num_free(txRing->lastWrite, + if (netif_queue_stopped(tp->dev) && typhoon_num_free(txRing->lastWrite, lastRead, TXLO_ENTRIES) > (numDesc + 2)) netif_wake_queue(tp->dev); @@ -1563,7 +1563,7 @@ typhoon_recycle_rx_skb(struct typhoon *tp, u32 idx) struct basic_ring *ring = &tp->rxBuffRing; struct rx_free *r; - if((ring->lastWrite + sizeof(*r)) % (RXFREE_ENTRIES * sizeof(*r)) == + if ((ring->lastWrite + sizeof(*r)) % (RXFREE_ENTRIES * sizeof(*r)) == le32_to_cpu(indexes->rxBuffCleared)) { /* no room in ring, just drop the skb */ @@ -1594,12 +1594,12 @@ typhoon_alloc_rx_skb(struct typhoon *tp, u32 idx) rxb->skb = NULL; - if((ring->lastWrite + sizeof(*r)) % (RXFREE_ENTRIES * sizeof(*r)) == + if ((ring->lastWrite + sizeof(*r)) % (RXFREE_ENTRIES * sizeof(*r)) == le32_to_cpu(indexes->rxBuffCleared)) return -ENOMEM; skb = netdev_alloc_skb(tp->dev, PKT_BUF_SZ); - if(!skb) + if (!skb) return -ENOMEM; #if 0 @@ -1646,7 +1646,7 @@ typhoon_rx(struct typhoon *tp, struct basic_ring *rxRing, volatile __le32 * read received = 0; local_ready = le32_to_cpu(*ready); rxaddr = le32_to_cpu(*cleared); - while(rxaddr != local_ready && budget > 0) { + while (rxaddr != local_ready && budget > 0) { rx = (struct rx_desc *) (rxRing->ringBase + rxaddr); idx = rx->addr; rxb = &tp->rxbuffers[idx]; @@ -1655,14 +1655,14 @@ typhoon_rx(struct typhoon *tp, struct basic_ring *rxRing, volatile __le32 * read typhoon_inc_rx_index(&rxaddr, 1); - if(rx->flags & TYPHOON_RX_ERROR) { + if (rx->flags & TYPHOON_RX_ERROR) { typhoon_recycle_rx_skb(tp, idx); continue; } pkt_len = le16_to_cpu(rx->frameLen); - if(pkt_len < rx_copybreak && + if (pkt_len < rx_copybreak && (new_skb = netdev_alloc_skb(tp->dev, pkt_len + 2)) != NULL) { skb_reserve(new_skb, 2); pci_dma_sync_single_for_cpu(tp->pdev, dma_addr, @@ -1684,7 +1684,7 @@ typhoon_rx(struct typhoon *tp, struct basic_ring *rxRing, volatile __le32 * read new_skb->protocol = eth_type_trans(new_skb, tp->dev); csum_bits = rx->rxStatus & (TYPHOON_RX_IP_CHK_GOOD | TYPHOON_RX_UDP_CHK_GOOD | TYPHOON_RX_TCP_CHK_GOOD); - if(csum_bits == + if (csum_bits == (TYPHOON_RX_IP_CHK_GOOD | TYPHOON_RX_TCP_CHK_GOOD) || csum_bits == (TYPHOON_RX_IP_CHK_GOOD | TYPHOON_RX_UDP_CHK_GOOD)) { @@ -1710,11 +1710,11 @@ typhoon_fill_free_ring(struct typhoon *tp) { u32 i; - for(i = 0; i < RXENT_ENTRIES; i++) { + for (i = 0; i < RXENT_ENTRIES; i++) { struct rxbuff_ent *rxb = &tp->rxbuffers[i]; - if(rxb->skb) + if (rxb->skb) continue; - if(typhoon_alloc_rx_skb(tp, i) < 0) + if (typhoon_alloc_rx_skb(tp, i) < 0) break; } } @@ -1727,25 +1727,25 @@ typhoon_poll(struct napi_struct *napi, int budget) int work_done; rmb(); - if(!tp->awaiting_resp && indexes->respReady != indexes->respCleared) + if (!tp->awaiting_resp && indexes->respReady != indexes->respCleared) typhoon_process_response(tp, 0, NULL); - if(le32_to_cpu(indexes->txLoCleared) != tp->txLoRing.lastRead) + if (le32_to_cpu(indexes->txLoCleared) != tp->txLoRing.lastRead) typhoon_tx_complete(tp, &tp->txLoRing, &indexes->txLoCleared); work_done = 0; - if(indexes->rxHiCleared != indexes->rxHiReady) { + if (indexes->rxHiCleared != indexes->rxHiReady) { work_done += typhoon_rx(tp, &tp->rxHiRing, &indexes->rxHiReady, &indexes->rxHiCleared, budget); } - if(indexes->rxLoCleared != indexes->rxLoReady) { + if (indexes->rxLoCleared != indexes->rxLoReady) { work_done += typhoon_rx(tp, &tp->rxLoRing, &indexes->rxLoReady, &indexes->rxLoCleared, budget - work_done); } - if(le32_to_cpu(indexes->rxBuffCleared) == tp->rxBuffRing.lastWrite) { + if (le32_to_cpu(indexes->rxBuffCleared) == tp->rxBuffRing.lastWrite) { /* rxBuff ring is empty, try to fill it. */ typhoon_fill_free_ring(tp); } @@ -1769,7 +1769,7 @@ typhoon_interrupt(int irq, void *dev_instance) u32 intr_status; intr_status = ioread32(ioaddr + TYPHOON_REG_INTR_STATUS); - if(!(intr_status & TYPHOON_INTR_HOST_INT)) + if (!(intr_status & TYPHOON_INTR_HOST_INT)) return IRQ_NONE; iowrite32(intr_status, ioaddr + TYPHOON_REG_INTR_STATUS); @@ -1789,9 +1789,9 @@ typhoon_free_rx_rings(struct typhoon *tp) { u32 i; - for(i = 0; i < RXENT_ENTRIES; i++) { + for (i = 0; i < RXENT_ENTRIES; i++) { struct rxbuff_ent *rxb = &tp->rxbuffers[i]; - if(rxb->skb) { + if (rxb->skb) { pci_unmap_single(tp->pdev, rxb->dma_addr, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); dev_kfree_skb(rxb->skb); @@ -1811,7 +1811,7 @@ typhoon_sleep(struct typhoon *tp, pci_power_t state, __le16 events) INIT_COMMAND_WITH_RESPONSE(&xp_cmd, TYPHOON_CMD_ENABLE_WAKE_EVENTS); xp_cmd.parm1 = events; err = typhoon_issue_command(tp, 1, &xp_cmd, 0, NULL); - if(err < 0) { + if (err < 0) { netdev_err(tp->dev, "typhoon_sleep(): wake events cmd err %d\n", err); return err; @@ -1819,12 +1819,12 @@ typhoon_sleep(struct typhoon *tp, pci_power_t state, __le16 events) INIT_COMMAND_NO_RESPONSE(&xp_cmd, TYPHOON_CMD_GOTO_SLEEP); err = typhoon_issue_command(tp, 1, &xp_cmd, 0, NULL); - if(err < 0) { + if (err < 0) { netdev_err(tp->dev, "typhoon_sleep(): sleep cmd err %d\n", err); return err; } - if(typhoon_wait_status(ioaddr, TYPHOON_STATUS_SLEEPING) < 0) + if (typhoon_wait_status(ioaddr, TYPHOON_STATUS_SLEEPING) < 0) return -ETIMEDOUT; /* Since we cannot monitor the status of the link while sleeping, @@ -1851,7 +1851,7 @@ typhoon_wakeup(struct typhoon *tp, int wait_type) * the old firmware pay for the reset. */ iowrite32(TYPHOON_BOOTCMD_WAKEUP, ioaddr + TYPHOON_REG_COMMAND); - if(typhoon_wait_status(ioaddr, TYPHOON_STATUS_WAITING_FOR_HOST) < 0 || + if (typhoon_wait_status(ioaddr, TYPHOON_STATUS_WAITING_FOR_HOST) < 0 || (tp->capabilities & TYPHOON_WAKEUP_NEEDS_RESET)) return typhoon_reset(ioaddr, wait_type); @@ -1870,12 +1870,12 @@ typhoon_start_runtime(struct typhoon *tp) typhoon_fill_free_ring(tp); err = typhoon_download_firmware(tp); - if(err < 0) { + if (err < 0) { netdev_err(tp->dev, "cannot load runtime on 3XP\n"); goto error_out; } - if(typhoon_boot_3XP(tp, TYPHOON_STATUS_WAITING_FOR_BOOT) < 0) { + if (typhoon_boot_3XP(tp, TYPHOON_STATUS_WAITING_FOR_BOOT) < 0) { netdev_err(tp->dev, "cannot boot 3XP\n"); err = -EIO; goto error_out; @@ -1884,14 +1884,14 @@ typhoon_start_runtime(struct typhoon *tp) INIT_COMMAND_NO_RESPONSE(&xp_cmd, TYPHOON_CMD_SET_MAX_PKT_SIZE); xp_cmd.parm1 = cpu_to_le16(PKT_BUF_SZ); err = typhoon_issue_command(tp, 1, &xp_cmd, 0, NULL); - if(err < 0) + if (err < 0) goto error_out; INIT_COMMAND_NO_RESPONSE(&xp_cmd, TYPHOON_CMD_SET_MAC_ADDRESS); xp_cmd.parm1 = cpu_to_le16(ntohs(*(__be16 *)&dev->dev_addr[0])); xp_cmd.parm2 = cpu_to_le32(ntohl(*(__be32 *)&dev->dev_addr[2])); err = typhoon_issue_command(tp, 1, &xp_cmd, 0, NULL); - if(err < 0) + if (err < 0) goto error_out; /* Disable IRQ coalescing -- we can reenable it when 3Com gives @@ -1900,38 +1900,38 @@ typhoon_start_runtime(struct typhoon *tp) INIT_COMMAND_WITH_RESPONSE(&xp_cmd, TYPHOON_CMD_IRQ_COALESCE_CTRL); xp_cmd.parm1 = 0; err = typhoon_issue_command(tp, 1, &xp_cmd, 0, NULL); - if(err < 0) + if (err < 0) goto error_out; INIT_COMMAND_NO_RESPONSE(&xp_cmd, TYPHOON_CMD_XCVR_SELECT); xp_cmd.parm1 = tp->xcvr_select; err = typhoon_issue_command(tp, 1, &xp_cmd, 0, NULL); - if(err < 0) + if (err < 0) goto error_out; INIT_COMMAND_NO_RESPONSE(&xp_cmd, TYPHOON_CMD_VLAN_TYPE_WRITE); xp_cmd.parm1 = cpu_to_le16(ETH_P_8021Q); err = typhoon_issue_command(tp, 1, &xp_cmd, 0, NULL); - if(err < 0) + if (err < 0) goto error_out; INIT_COMMAND_NO_RESPONSE(&xp_cmd, TYPHOON_CMD_SET_OFFLOAD_TASKS); xp_cmd.parm2 = tp->offload; xp_cmd.parm3 = tp->offload; err = typhoon_issue_command(tp, 1, &xp_cmd, 0, NULL); - if(err < 0) + if (err < 0) goto error_out; typhoon_set_rx_mode(dev); INIT_COMMAND_NO_RESPONSE(&xp_cmd, TYPHOON_CMD_TX_ENABLE); err = typhoon_issue_command(tp, 1, &xp_cmd, 0, NULL); - if(err < 0) + if (err < 0) goto error_out; INIT_COMMAND_WITH_RESPONSE(&xp_cmd, TYPHOON_CMD_RX_ENABLE); err = typhoon_issue_command(tp, 1, &xp_cmd, 0, NULL); - if(err < 0) + if (err < 0) goto error_out; tp->card_state = Running; @@ -1971,13 +1971,13 @@ typhoon_stop_runtime(struct typhoon *tp, int wait_type) /* Wait 1/2 sec for any outstanding transmits to occur * We'll cleanup after the reset if this times out. */ - for(i = 0; i < TYPHOON_WAIT_TIMEOUT; i++) { - if(indexes->txLoCleared == cpu_to_le32(txLo->lastWrite)) + for (i = 0; i < TYPHOON_WAIT_TIMEOUT; i++) { + if (indexes->txLoCleared == cpu_to_le32(txLo->lastWrite)) break; udelay(TYPHOON_UDELAY); } - if(i == TYPHOON_WAIT_TIMEOUT) + if (i == TYPHOON_WAIT_TIMEOUT) netdev_err(tp->dev, "halt timed out waiting for Tx to complete\n"); INIT_COMMAND_NO_RESPONSE(&xp_cmd, TYPHOON_CMD_TX_DISABLE); @@ -1994,16 +1994,16 @@ typhoon_stop_runtime(struct typhoon *tp, int wait_type) INIT_COMMAND_NO_RESPONSE(&xp_cmd, TYPHOON_CMD_HALT); typhoon_issue_command(tp, 1, &xp_cmd, 0, NULL); - if(typhoon_wait_status(ioaddr, TYPHOON_STATUS_HALTED) < 0) + if (typhoon_wait_status(ioaddr, TYPHOON_STATUS_HALTED) < 0) netdev_err(tp->dev, "timed out waiting for 3XP to halt\n"); - if(typhoon_reset(ioaddr, wait_type) < 0) { + if (typhoon_reset(ioaddr, wait_type) < 0) { netdev_err(tp->dev, "unable to reset 3XP\n"); return -ETIMEDOUT; } /* cleanup any outstanding Tx packets */ - if(indexes->txLoCleared != cpu_to_le32(txLo->lastWrite)) { + if (indexes->txLoCleared != cpu_to_le32(txLo->lastWrite)) { indexes->txLoCleared = cpu_to_le32(txLo->lastWrite); typhoon_clean_tx(tp, &tp->txLoRing, &indexes->txLoCleared); } @@ -2016,7 +2016,7 @@ typhoon_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct typhoon *tp = netdev_priv(dev); - if(typhoon_reset(tp->ioaddr, WaitNoSleep) < 0) { + if (typhoon_reset(tp->ioaddr, WaitNoSleep) < 0) { netdev_warn(dev, "could not reset in tx timeout\n"); goto truly_dead; } @@ -2025,7 +2025,7 @@ typhoon_tx_timeout(struct net_device *dev, unsigned int txqueue) typhoon_clean_tx(tp, &tp->txLoRing, &tp->indexes->txLoCleared); typhoon_free_rx_rings(tp); - if(typhoon_start_runtime(tp) < 0) { + if (typhoon_start_runtime(tp) < 0) { netdev_err(dev, "could not start runtime in tx timeout\n"); goto truly_dead; } @@ -2050,20 +2050,20 @@ typhoon_open(struct net_device *dev) goto out; err = typhoon_wakeup(tp, WaitSleep); - if(err < 0) { + if (err < 0) { netdev_err(dev, "unable to wakeup device\n"); goto out_sleep; } err = request_irq(dev->irq, typhoon_interrupt, IRQF_SHARED, dev->name, dev); - if(err < 0) + if (err < 0) goto out_sleep; napi_enable(&tp->napi); err = typhoon_start_runtime(tp); - if(err < 0) { + if (err < 0) { napi_disable(&tp->napi); goto out_irq; } @@ -2075,13 +2075,13 @@ out_irq: free_irq(dev->irq, dev); out_sleep: - if(typhoon_boot_3XP(tp, TYPHOON_STATUS_WAITING_FOR_HOST) < 0) { + if (typhoon_boot_3XP(tp, TYPHOON_STATUS_WAITING_FOR_HOST) < 0) { netdev_err(dev, "unable to reboot into sleep img\n"); typhoon_reset(tp->ioaddr, NoWait); goto out; } - if(typhoon_sleep(tp, PCI_D3hot, 0) < 0) + if (typhoon_sleep(tp, PCI_D3hot, 0) < 0) netdev_err(dev, "unable to go back to sleep\n"); out: @@ -2096,7 +2096,7 @@ typhoon_close(struct net_device *dev) netif_stop_queue(dev); napi_disable(&tp->napi); - if(typhoon_stop_runtime(tp, WaitSleep) < 0) + if (typhoon_stop_runtime(tp, WaitSleep) < 0) netdev_err(dev, "unable to stop runtime\n"); /* Make sure there is no irq handler running on a different CPU. */ @@ -2105,10 +2105,10 @@ typhoon_close(struct net_device *dev) typhoon_free_rx_rings(tp); typhoon_init_rings(tp); - if(typhoon_boot_3XP(tp, TYPHOON_STATUS_WAITING_FOR_HOST) < 0) + if (typhoon_boot_3XP(tp, TYPHOON_STATUS_WAITING_FOR_HOST) < 0) netdev_err(dev, "unable to boot sleep image\n"); - if(typhoon_sleep(tp, PCI_D3hot, 0) < 0) + if (typhoon_sleep(tp, PCI_D3hot, 0) < 0) netdev_err(dev, "unable to put card to sleep\n"); return 0; @@ -2123,15 +2123,15 @@ typhoon_resume(struct pci_dev *pdev) /* If we're down, resume when we are upped. */ - if(!netif_running(dev)) + if (!netif_running(dev)) return 0; - if(typhoon_wakeup(tp, WaitNoSleep) < 0) { + if (typhoon_wakeup(tp, WaitNoSleep) < 0) { netdev_err(dev, "critical: could not wake up in resume\n"); goto reset; } - if(typhoon_start_runtime(tp) < 0) { + if (typhoon_start_runtime(tp) < 0) { netdev_err(dev, "critical: could not start runtime in resume\n"); goto reset; } @@ -2153,16 +2153,16 @@ typhoon_suspend(struct pci_dev *pdev, pm_message_t state) /* If we're down, we're already suspended. */ - if(!netif_running(dev)) + if (!netif_running(dev)) return 0; /* TYPHOON_OFFLOAD_VLAN is always on now, so this doesn't work */ - if(tp->wol_events & TYPHOON_WAKE_MAGIC_PKT) + if (tp->wol_events & TYPHOON_WAKE_MAGIC_PKT) netdev_warn(dev, "cannot do WAKE_MAGIC with VLAN offloading\n"); netif_device_detach(dev); - if(typhoon_stop_runtime(tp, WaitNoSleep) < 0) { + if (typhoon_stop_runtime(tp, WaitNoSleep) < 0) { netdev_err(dev, "unable to stop runtime\n"); goto need_resume; } @@ -2170,7 +2170,7 @@ typhoon_suspend(struct pci_dev *pdev, pm_message_t state) typhoon_free_rx_rings(tp); typhoon_init_rings(tp); - if(typhoon_boot_3XP(tp, TYPHOON_STATUS_WAITING_FOR_HOST) < 0) { + if (typhoon_boot_3XP(tp, TYPHOON_STATUS_WAITING_FOR_HOST) < 0) { netdev_err(dev, "unable to boot sleep image\n"); goto need_resume; } @@ -2178,19 +2178,19 @@ typhoon_suspend(struct pci_dev *pdev, pm_message_t state) INIT_COMMAND_NO_RESPONSE(&xp_cmd, TYPHOON_CMD_SET_MAC_ADDRESS); xp_cmd.parm1 = cpu_to_le16(ntohs(*(__be16 *)&dev->dev_addr[0])); xp_cmd.parm2 = cpu_to_le32(ntohl(*(__be32 *)&dev->dev_addr[2])); - if(typhoon_issue_command(tp, 1, &xp_cmd, 0, NULL) < 0) { + if (typhoon_issue_command(tp, 1, &xp_cmd, 0, NULL) < 0) { netdev_err(dev, "unable to set mac address in suspend\n"); goto need_resume; } INIT_COMMAND_NO_RESPONSE(&xp_cmd, TYPHOON_CMD_SET_RX_FILTER); xp_cmd.parm1 = TYPHOON_RX_FILTER_DIRECTED | TYPHOON_RX_FILTER_BROADCAST; - if(typhoon_issue_command(tp, 1, &xp_cmd, 0, NULL) < 0) { + if (typhoon_issue_command(tp, 1, &xp_cmd, 0, NULL) < 0) { netdev_err(dev, "unable to set rx filter in suspend\n"); goto need_resume; } - if(typhoon_sleep(tp, pci_choose_state(pdev, state), tp->wol_events) < 0) { + if (typhoon_sleep(tp, pci_choose_state(pdev, state), tp->wol_events) < 0) { netdev_err(dev, "unable to put card to sleep\n"); goto need_resume; } @@ -2210,10 +2210,10 @@ typhoon_test_mmio(struct pci_dev *pdev) int mode = 0; u32 val; - if(!ioaddr) + if (!ioaddr) goto out; - if(ioread32(ioaddr + TYPHOON_REG_STATUS) != + if (ioread32(ioaddr + TYPHOON_REG_STATUS) != TYPHOON_STATUS_WAITING_FOR_HOST) goto out_unmap; @@ -2226,12 +2226,12 @@ typhoon_test_mmio(struct pci_dev *pdev) * The 50usec delay is arbitrary -- it could probably be smaller. */ val = ioread32(ioaddr + TYPHOON_REG_INTR_STATUS); - if((val & TYPHOON_INTR_SELF) == 0) { + if ((val & TYPHOON_INTR_SELF) == 0) { iowrite32(1, ioaddr + TYPHOON_REG_SELF_INTERRUPT); ioread32(ioaddr + TYPHOON_REG_INTR_STATUS); udelay(50); val = ioread32(ioaddr + TYPHOON_REG_INTR_STATUS); - if(val & TYPHOON_INTR_SELF) + if (val & TYPHOON_INTR_SELF) mode = 1; } @@ -2244,7 +2244,7 @@ out_unmap: pci_iounmap(pdev, ioaddr); out: - if(!mode) + if (!mode) pr_info("%s: falling back to port IO\n", pci_name(pdev)); return mode; } @@ -2275,7 +2275,7 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) const char *err_msg; dev = alloc_etherdev(sizeof(*tp)); - if(dev == NULL) { + if (dev == NULL) { err_msg = "unable to alloc new net device"; err = -ENOMEM; goto error_out; @@ -2283,55 +2283,55 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) SET_NETDEV_DEV(dev, &pdev->dev); err = pci_enable_device(pdev); - if(err < 0) { + if (err < 0) { err_msg = "unable to enable device"; goto error_out_dev; } err = pci_set_mwi(pdev); - if(err < 0) { + if (err < 0) { err_msg = "unable to set MWI"; goto error_out_disable; } err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if(err < 0) { + if (err < 0) { err_msg = "No usable DMA configuration"; goto error_out_mwi; } /* sanity checks on IO and MMIO BARs */ - if(!(pci_resource_flags(pdev, 0) & IORESOURCE_IO)) { + if (!(pci_resource_flags(pdev, 0) & IORESOURCE_IO)) { err_msg = "region #1 not a PCI IO resource, aborting"; err = -ENODEV; goto error_out_mwi; } - if(pci_resource_len(pdev, 0) < 128) { + if (pci_resource_len(pdev, 0) < 128) { err_msg = "Invalid PCI IO region size, aborting"; err = -ENODEV; goto error_out_mwi; } - if(!(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) { + if (!(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) { err_msg = "region #1 not a PCI MMIO resource, aborting"; err = -ENODEV; goto error_out_mwi; } - if(pci_resource_len(pdev, 1) < 128) { + if (pci_resource_len(pdev, 1) < 128) { err_msg = "Invalid PCI MMIO region size, aborting"; err = -ENODEV; goto error_out_mwi; } err = pci_request_regions(pdev, KBUILD_MODNAME); - if(err < 0) { + if (err < 0) { err_msg = "could not request regions"; goto error_out_mwi; } /* map our registers */ - if(use_mmio != 0 && use_mmio != 1) + if (use_mmio != 0 && use_mmio != 1) use_mmio = typhoon_test_mmio(pdev); ioaddr = pci_iomap(pdev, use_mmio, 128); @@ -2345,7 +2345,7 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) */ shared = pci_alloc_consistent(pdev, sizeof(struct typhoon_shared), &shared_dma); - if(!shared) { + if (!shared) { err_msg = "could not allocate DMA memory"; err = -ENOMEM; goto error_out_remap; @@ -2425,7 +2425,7 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) * seem to need a little extra help to get started. Since we don't * know how to nudge it along, just kick it. */ - if(xp_resp[0].numDesc != 0) + if (xp_resp[0].numDesc != 0) tp->capabilities |= TYPHOON_WAKEUP_NEEDS_RESET; err = typhoon_sleep(tp, PCI_D3hot, 0); @@ -2470,14 +2470,14 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* xp_resp still contains the response to the READ_VERSIONS command. * For debugging, let the user know what version he has. */ - if(xp_resp[0].numDesc == 0) { + if (xp_resp[0].numDesc == 0) { /* This is the Typhoon 1.0 type Sleep Image, last 16 bits * of version is Month/Day of build. */ u16 monthday = le32_to_cpu(xp_resp[0].parm2) & 0xffff; netdev_info(dev, "Typhoon 1.0 Sleep Image built %02u/%02u/2000\n", monthday >> 8, monthday & 0xff); - } else if(xp_resp[0].numDesc == 2) { + } else if (xp_resp[0].numDesc == 2) { /* This is the Typhoon 1.1+ type Sleep Image */ u32 sleep_ver = le32_to_cpu(xp_resp[0].parm2); diff --git a/drivers/net/ethernet/3com/typhoon.h b/drivers/net/ethernet/3com/typhoon.h index 88187fc84aa3..2f634c64d5d1 100644 --- a/drivers/net/ethernet/3com/typhoon.h +++ b/drivers/net/ethernet/3com/typhoon.h @@ -366,7 +366,7 @@ struct resp_desc { memset(_ptr, 0, sizeof(struct cmd_desc)); \ _ptr->flags = TYPHOON_CMD_DESC | TYPHOON_DESC_VALID; \ _ptr->cmd = command; \ - } while(0) + } while (0) /* We set seqNo to 1 if we're expecting a response from this command */ #define INIT_COMMAND_WITH_RESPONSE(x, command) \ @@ -376,7 +376,7 @@ struct resp_desc { _ptr->flags |= TYPHOON_DESC_VALID; \ _ptr->cmd = command; \ _ptr->seqNo = 1; \ - } while(0) + } while (0) /* TYPHOON_CMD_SET_RX_FILTER filter bits (cmd.parm1) */ diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 552d4cbf6dbd..9cc28b4b2627 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -221,7 +221,7 @@ static void ena_queue_strings(struct ena_adapter *adapter, u8 **data) snprintf(*data, ETH_GSTRING_LEN, "queue_%u_tx_%s", i, ena_stats->name); - (*data) += ETH_GSTRING_LEN; + (*data) += ETH_GSTRING_LEN; } /* Rx stats */ for (j = 0; j < ENA_STATS_ARRAY_RX; j++) { diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index bea2dbc0e469..af7ce5c5488c 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2133,7 +2133,7 @@ static int bcm_sysport_rule_set(struct bcm_sysport_priv *priv, return -ENOSPC; index = find_first_zero_bit(priv->filters, RXCHK_BRCM_TAG_MAX); - if (index > RXCHK_BRCM_TAG_MAX) + if (index >= RXCHK_BRCM_TAG_MAX) return -ENOSPC; /* Location is the classification ID, and index is the position diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4c9696a3978a..663dcf614004 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10955,13 +10955,13 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu) struct bnxt *bp = netdev_priv(dev); if (netif_running(dev)) - bnxt_close_nic(bp, false, false); + bnxt_close_nic(bp, true, false); dev->mtu = new_mtu; bnxt_set_ring_params(bp); if (netif_running(dev)) - return bnxt_open_nic(bp, false, false); + return bnxt_open_nic(bp, true, false); return 0; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index cc807ba6f163..677bab95b937 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2006,8 +2006,8 @@ int bnxt_flash_package_from_file(struct net_device *dev, const char *filename, struct hwrm_nvm_install_update_output *resp = bp->hwrm_cmd_resp_addr; struct hwrm_nvm_install_update_input install = {0}; const struct firmware *fw; - int rc, hwrm_err = 0; u32 item_len; + int rc = 0; u16 index; bnxt_hwrm_fw_set_time(bp); @@ -2051,15 +2051,14 @@ int bnxt_flash_package_from_file(struct net_device *dev, const char *filename, memcpy(kmem, fw->data, fw->size); modify.host_src_addr = cpu_to_le64(dma_handle); - hwrm_err = hwrm_send_message(bp, &modify, - sizeof(modify), - FLASH_PACKAGE_TIMEOUT); + rc = hwrm_send_message(bp, &modify, sizeof(modify), + FLASH_PACKAGE_TIMEOUT); dma_free_coherent(&bp->pdev->dev, fw->size, kmem, dma_handle); } } release_firmware(fw); - if (rc || hwrm_err) + if (rc) goto err_exit; if ((install_type & 0xffff) == 0) @@ -2068,20 +2067,19 @@ int bnxt_flash_package_from_file(struct net_device *dev, const char *filename, install.install_type = cpu_to_le32(install_type); mutex_lock(&bp->hwrm_cmd_lock); - hwrm_err = _hwrm_send_message(bp, &install, sizeof(install), - INSTALL_PACKAGE_TIMEOUT); - if (hwrm_err) { + rc = _hwrm_send_message(bp, &install, sizeof(install), + INSTALL_PACKAGE_TIMEOUT); + if (rc) { u8 error_code = ((struct hwrm_err_output *)resp)->cmd_err; if (resp->error_code && error_code == NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) { install.flags |= cpu_to_le16( NVM_INSTALL_UPDATE_REQ_FLAGS_ALLOWED_TO_DEFRAG); - hwrm_err = _hwrm_send_message(bp, &install, - sizeof(install), - INSTALL_PACKAGE_TIMEOUT); + rc = _hwrm_send_message(bp, &install, sizeof(install), + INSTALL_PACKAGE_TIMEOUT); } - if (hwrm_err) + if (rc) goto flash_pkg_exit; } @@ -2093,7 +2091,7 @@ int bnxt_flash_package_from_file(struct net_device *dev, const char *filename, flash_pkg_exit: mutex_unlock(&bp->hwrm_cmd_lock); err_exit: - if (hwrm_err == -EACCES) + if (rc == -EACCES) bnxt_print_admin_err(bp); return rc; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 523bf4be43cc..b19be7549aad 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -300,7 +300,7 @@ static int bnxt_tc_parse_actions(struct bnxt *bp, return -EINVAL; } - if (!flow_action_basic_hw_stats_types_check(flow_action, extack)) + if (!flow_action_basic_hw_stats_check(flow_action, extack)) return -EOPNOTSUPP; flow_action_for_each(i, act, flow_action) { diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index a8d9ec927627..66d31c018c7e 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1375,7 +1375,6 @@ static int octeon_chip_specific_setup(struct octeon_device *oct) { u32 dev_id, rev_id; int ret = 1; - char *s; pci_read_config_dword(oct->pci_dev, 0, &dev_id); pci_read_config_dword(oct->pci_dev, 8, &rev_id); @@ -1385,13 +1384,11 @@ static int octeon_chip_specific_setup(struct octeon_device *oct) case OCTEON_CN68XX_PCIID: oct->chip_id = OCTEON_CN68XX; ret = lio_setup_cn68xx_octeon_device(oct); - s = "CN68XX"; break; case OCTEON_CN66XX_PCIID: oct->chip_id = OCTEON_CN66XX; ret = lio_setup_cn66xx_octeon_device(oct); - s = "CN66XX"; break; case OCTEON_CN23XX_PCIID_PF: @@ -1404,11 +1401,9 @@ static int octeon_chip_specific_setup(struct octeon_device *oct) pci_sriov_set_totalvfs(oct->pci_dev, oct->sriov_info.max_vfs); #endif - s = "CN23XX"; break; default: - s = "?"; dev_err(&oct->pci_dev->dev, "Unknown device found (dev_id: %x)\n", dev_id); } diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 4ab57d33a87e..069e7413f1ef 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -1179,13 +1179,12 @@ void nicvf_sq_disable(struct nicvf *nic, int qidx) void nicvf_sq_free_used_descs(struct net_device *netdev, struct snd_queue *sq, int qidx) { - u64 head, tail; + u64 head; struct sk_buff *skb; struct nicvf *nic = netdev_priv(netdev); struct sq_hdr_subdesc *hdr; head = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_HEAD, qidx) >> 4; - tail = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_TAIL, qidx) >> 4; while (sq->head != head) { hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head); if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 2a2938bbb93a..e8852dfcc1f1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -438,13 +438,118 @@ int cxgb4_get_filter_counters(struct net_device *dev, unsigned int fidx, return get_filter_count(adapter, fidx, hitcnt, bytecnt, hash); } -int cxgb4_get_free_ftid(struct net_device *dev, int family) +static bool cxgb4_filter_prio_in_range(struct tid_info *t, u32 idx, u8 nslots, + u32 prio) +{ + struct filter_entry *prev_tab, *next_tab, *prev_fe, *next_fe; + u32 prev_ftid, next_ftid; + + /* Only insert the rule if both of the following conditions + * are met: + * 1. The immediate previous rule has priority <= @prio. + * 2. The immediate next rule has priority >= @prio. + */ + + /* High Priority (HPFILTER) region always has higher priority + * than normal FILTER region. So, all rules in HPFILTER region + * must have prio value <= rules in normal FILTER region. + */ + if (idx < t->nhpftids) { + /* Don't insert if there's a rule already present at @idx + * in HPFILTER region. + */ + if (test_bit(idx, t->hpftid_bmap)) + return false; + + next_tab = t->hpftid_tab; + next_ftid = find_next_bit(t->hpftid_bmap, t->nhpftids, idx); + if (next_ftid >= t->nhpftids) { + /* No next entry found in HPFILTER region. + * See if there's any next entry in normal + * FILTER region. + */ + next_ftid = find_first_bit(t->ftid_bmap, t->nftids); + if (next_ftid >= t->nftids) + next_ftid = idx; + else + next_tab = t->ftid_tab; + } + + /* Search for the closest previous filter entry in HPFILTER + * region. No need to search in normal FILTER region because + * there can never be any entry in normal FILTER region whose + * prio value is < last entry in HPFILTER region. + */ + prev_ftid = find_last_bit(t->hpftid_bmap, idx); + if (prev_ftid >= idx) + prev_ftid = idx; + + prev_tab = t->hpftid_tab; + } else { + idx -= t->nhpftids; + + /* Don't insert if there's a rule already present at @idx + * in normal FILTER region. + */ + if (test_bit(idx, t->ftid_bmap)) + return false; + + prev_tab = t->ftid_tab; + prev_ftid = find_last_bit(t->ftid_bmap, idx); + if (prev_ftid >= idx) { + /* No previous entry found in normal FILTER + * region. See if there's any previous entry + * in HPFILTER region. + */ + prev_ftid = find_last_bit(t->hpftid_bmap, t->nhpftids); + if (prev_ftid >= t->nhpftids) + prev_ftid = idx; + else + prev_tab = t->hpftid_tab; + } + + /* Search for the closest next filter entry in normal + * FILTER region. No need to search in HPFILTER region + * because there can never be any entry in HPFILTER + * region whose prio value is > first entry in normal + * FILTER region. + */ + next_ftid = find_next_bit(t->ftid_bmap, t->nftids, idx); + if (next_ftid >= t->nftids) + next_ftid = idx; + + next_tab = t->ftid_tab; + } + + next_fe = &next_tab[next_ftid]; + + /* See if the filter entry belongs to an IPv6 rule, which + * occupy 4 slots on T5 and 2 slots on T6. Adjust the + * reference to the previously inserted filter entry + * accordingly. + */ + prev_fe = &prev_tab[prev_ftid & ~(nslots - 1)]; + if (!prev_fe->fs.type) + prev_fe = &prev_tab[prev_ftid]; + + if ((prev_fe->valid && prev_fe->fs.tc_prio > prio) || + (next_fe->valid && next_fe->fs.tc_prio < prio)) + return false; + + return true; +} + +int cxgb4_get_free_ftid(struct net_device *dev, u8 family, bool hash_en, + u32 tc_prio) { struct adapter *adap = netdev2adap(dev); struct tid_info *t = &adap->tids; + struct filter_entry *tab, *f; + u32 bmap_ftid, max_ftid; + unsigned long *bmap; bool found = false; - u8 i, n, cnt; - int ftid; + u8 i, cnt, n; + int ftid = 0; /* IPv4 occupy 1 slot. IPv6 occupy 2 slots on T6 and 4 slots * on T5. @@ -456,34 +561,129 @@ int cxgb4_get_free_ftid(struct net_device *dev, int family) n += 2; } - if (n > t->nftids) - return -ENOMEM; - - /* Find free filter slots from the end of TCAM. Appropriate - * checks must be done by caller later to ensure the prio - * passed by TC doesn't conflict with prio saved by existing - * rules in the TCAM. + /* There are 3 filter regions available in hardware in + * following order of priority: + * + * 1. High Priority (HPFILTER) region (Highest Priority). + * 2. HASH region. + * 3. Normal FILTER region (Lowest Priority). + * + * Entries in HPFILTER and normal FILTER region have index + * 0 as the highest priority and the rules will be scanned + * in ascending order until either a rule hits or end of + * the region is reached. + * + * All HASH region entries have same priority. The set of + * fields to match in headers are pre-determined. The same + * set of header match fields must be compulsorily specified + * in all the rules wanting to get inserted in HASH region. + * Hence, HASH region is an exact-match region. A HASH is + * generated for a rule based on the values in the + * pre-determined set of header match fields. The generated + * HASH serves as an index into the HASH region. There can + * never be 2 rules having the same HASH. Hardware will + * compute a HASH for every incoming packet based on the + * values in the pre-determined set of header match fields + * and uses it as an index to check if there's a rule + * inserted in the HASH region at the specified index. If + * there's a rule inserted, then it's considered as a filter + * hit. Otherwise, it's a filter miss and normal FILTER region + * is scanned afterwards. */ + spin_lock_bh(&t->ftid_lock); - ftid = t->nftids - 1; - while (ftid >= n - 1) { + + ftid = (tc_prio <= t->nhpftids) ? 0 : t->nhpftids; + max_ftid = t->nftids + t->nhpftids; + while (ftid < max_ftid) { + if (ftid < t->nhpftids) { + /* If the new rule wants to get inserted into + * HPFILTER region, but its prio is greater + * than the rule with the highest prio in HASH + * region, then reject the rule. + */ + if (t->tc_hash_tids_max_prio && + tc_prio > t->tc_hash_tids_max_prio) + break; + + /* If there's not enough slots available + * in HPFILTER region, then move on to + * normal FILTER region immediately. + */ + if (ftid + n > t->nhpftids) { + ftid = t->nhpftids; + continue; + } + + bmap = t->hpftid_bmap; + bmap_ftid = ftid; + tab = t->hpftid_tab; + } else if (hash_en) { + /* Ensure priority is >= last rule in HPFILTER + * region. + */ + ftid = find_last_bit(t->hpftid_bmap, t->nhpftids); + if (ftid < t->nhpftids) { + f = &t->hpftid_tab[ftid]; + if (f->valid && tc_prio < f->fs.tc_prio) + break; + } + + /* Ensure priority is <= first rule in normal + * FILTER region. + */ + ftid = find_first_bit(t->ftid_bmap, t->nftids); + if (ftid < t->nftids) { + f = &t->ftid_tab[ftid]; + if (f->valid && tc_prio > f->fs.tc_prio) + break; + } + + found = true; + ftid = t->nhpftids; + goto out_unlock; + } else { + /* If the new rule wants to get inserted into + * normal FILTER region, but its prio is less + * than the rule with the highest prio in HASH + * region, then reject the rule. + */ + if (t->tc_hash_tids_max_prio && + tc_prio < t->tc_hash_tids_max_prio) + break; + + if (ftid + n > max_ftid) + break; + + bmap = t->ftid_bmap; + bmap_ftid = ftid - t->nhpftids; + tab = t->ftid_tab; + } + cnt = 0; for (i = 0; i < n; i++) { - if (test_bit(ftid - i, t->ftid_bmap)) + if (test_bit(bmap_ftid + i, bmap)) break; cnt++; } + if (cnt == n) { - ftid &= ~(n - 1); - found = true; - break; + /* Ensure the new rule's prio doesn't conflict + * with existing rules. + */ + if (cxgb4_filter_prio_in_range(t, ftid, n, + tc_prio)) { + ftid &= ~(n - 1); + found = true; + break; + } } - ftid -= n; + ftid += n; } - spin_unlock_bh(&t->ftid_lock); - ftid += t->nhpftids; +out_unlock: + spin_unlock_bh(&t->ftid_lock); return found ? ftid : -ENOMEM; } @@ -555,73 +755,6 @@ static void cxgb4_clear_hpftid(struct tid_info *t, int fidx, int family) spin_unlock_bh(&t->ftid_lock); } -bool cxgb4_filter_prio_in_range(struct net_device *dev, u32 idx, u32 prio) -{ - struct filter_entry *prev_fe, *next_fe, *tab; - struct adapter *adap = netdev2adap(dev); - u32 prev_ftid, next_ftid, max_tid; - struct tid_info *t = &adap->tids; - unsigned long *bmap; - bool valid = true; - - if (idx < t->nhpftids) { - bmap = t->hpftid_bmap; - tab = t->hpftid_tab; - max_tid = t->nhpftids; - } else { - idx -= t->nhpftids; - bmap = t->ftid_bmap; - tab = t->ftid_tab; - max_tid = t->nftids; - } - - /* Only insert the rule if both of the following conditions - * are met: - * 1. The immediate previous rule has priority <= @prio. - * 2. The immediate next rule has priority >= @prio. - */ - spin_lock_bh(&t->ftid_lock); - - /* Don't insert if there's a rule already present at @idx. */ - if (test_bit(idx, bmap)) { - valid = false; - goto out_unlock; - } - - next_ftid = find_next_bit(bmap, max_tid, idx); - if (next_ftid >= max_tid) - next_ftid = idx; - - next_fe = &tab[next_ftid]; - - prev_ftid = find_last_bit(bmap, idx); - if (prev_ftid >= idx) - prev_ftid = idx; - - /* See if the filter entry belongs to an IPv6 rule, which - * occupy 4 slots on T5 and 2 slots on T6. Adjust the - * reference to the previously inserted filter entry - * accordingly. - */ - if (CHELSIO_CHIP_VERSION(adap->params.chip) < CHELSIO_T6) { - prev_fe = &tab[prev_ftid & ~0x3]; - if (!prev_fe->fs.type) - prev_fe = &tab[prev_ftid]; - } else { - prev_fe = &tab[prev_ftid & ~0x1]; - if (!prev_fe->fs.type) - prev_fe = &tab[prev_ftid]; - } - - if ((prev_fe->valid && prio < prev_fe->fs.tc_prio) || - (next_fe->valid && prio > next_fe->fs.tc_prio)) - valid = false; - -out_unlock: - spin_unlock_bh(&t->ftid_lock); - return valid; -} - /* Delete the filter at a specified index. */ static int del_filter_wr(struct adapter *adapter, int fidx) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h index b3e4a645043d..b0751c0611ec 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h @@ -53,5 +53,4 @@ void clear_all_filters(struct adapter *adapter); void init_hash_filter(struct adapter *adap); bool is_filter_exact_match(struct adapter *adap, struct ch_filter_specification *fs); -bool cxgb4_filter_prio_in_range(struct net_device *dev, u32 idx, u32 prio); #endif /* __CXGB4_FILTER_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 3da25a2b5cc7..75fde0d4d493 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -5373,12 +5373,11 @@ static inline bool is_x_10g_port(const struct link_config *lc) static int cfg_queues(struct adapter *adap) { u32 avail_qsets, avail_eth_qsets, avail_uld_qsets; + u32 i, n10g = 0, qidx = 0, n1g = 0; + u32 ncpus = num_online_cpus(); u32 niqflint, neq, num_ulds; struct sge *s = &adap->sge; - u32 i, n10g = 0, qidx = 0; -#ifndef CONFIG_CHELSIO_T4_DCB - int q10g = 0; -#endif + u32 q10g = 0, q1g; /* Reduce memory usage in kdump environment, disable all offload. */ if (is_kdump_kernel() || (is_uld(adap) && t4_uld_mem_alloc(adap))) { @@ -5416,44 +5415,50 @@ static int cfg_queues(struct adapter *adap) n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg); avail_eth_qsets = min_t(u32, avail_qsets, MAX_ETH_QSETS); + + /* We default to 1 queue per non-10G port and up to # of cores queues + * per 10G port. + */ + if (n10g) + q10g = (avail_eth_qsets - (adap->params.nports - n10g)) / n10g; + + n1g = adap->params.nports - n10g; #ifdef CONFIG_CHELSIO_T4_DCB /* For Data Center Bridging support we need to be able to support up * to 8 Traffic Priorities; each of which will be assigned to its * own TX Queue in order to prevent Head-Of-Line Blocking. */ + q1g = 8; if (adap->params.nports * 8 > avail_eth_qsets) { dev_err(adap->pdev_dev, "DCB avail_eth_qsets=%d < %d!\n", avail_eth_qsets, adap->params.nports * 8); return -ENOMEM; } - for_each_port(adap, i) { - struct port_info *pi = adap2pinfo(adap, i); + if (adap->params.nports * ncpus < avail_eth_qsets) + q10g = max(8U, ncpus); + else + q10g = max(8U, q10g); - pi->first_qset = qidx; - pi->nqsets = is_kdump_kernel() ? 1 : 8; - qidx += pi->nqsets; - } -#else /* !CONFIG_CHELSIO_T4_DCB */ - /* We default to 1 queue per non-10G port and up to # of cores queues - * per 10G port. - */ - if (n10g) - q10g = (avail_eth_qsets - (adap->params.nports - n10g)) / n10g; - if (q10g > netif_get_num_default_rss_queues()) - q10g = netif_get_num_default_rss_queues(); + while ((q10g * n10g) > (avail_eth_qsets - n1g * q1g)) + q10g--; - if (is_kdump_kernel()) +#else /* !CONFIG_CHELSIO_T4_DCB */ + q1g = 1; + q10g = min(q10g, ncpus); +#endif /* !CONFIG_CHELSIO_T4_DCB */ + if (is_kdump_kernel()) { q10g = 1; + q1g = 1; + } for_each_port(adap, i) { struct port_info *pi = adap2pinfo(adap, i); pi->first_qset = qidx; - pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : 1; + pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : q1g; qidx += pi->nqsets; } -#endif /* !CONFIG_CHELSIO_T4_DCB */ s->ethqsets = qidx; s->max_ethqsets = qidx; /* MSI-X may lower it later */ @@ -5465,7 +5470,7 @@ static int cfg_queues(struct adapter *adap) * capped by the number of available cores. */ num_ulds = adap->num_uld + adap->num_ofld_uld; - i = min_t(u32, MAX_OFLD_QSETS, num_online_cpus()); + i = min_t(u32, MAX_OFLD_QSETS, ncpus); avail_uld_qsets = roundup(i, adap->params.nports); if (avail_qsets < num_ulds * adap->params.nports) { adap->params.offload = 0; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index cc46277e98de..aec9b90313e7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -553,7 +553,7 @@ int cxgb4_validate_flow_actions(struct net_device *dev, bool act_vlan = false; int i; - if (!flow_action_basic_hw_stats_types_check(actions, extack)) + if (!flow_action_basic_hw_stats_check(actions, extack)) return -EOPNOTSUPP; flow_action_for_each(i, act, actions) { @@ -635,6 +635,64 @@ int cxgb4_validate_flow_actions(struct net_device *dev, return 0; } +static void cxgb4_tc_flower_hash_prio_add(struct adapter *adap, u32 tc_prio) +{ + spin_lock_bh(&adap->tids.ftid_lock); + if (adap->tids.tc_hash_tids_max_prio < tc_prio) + adap->tids.tc_hash_tids_max_prio = tc_prio; + spin_unlock_bh(&adap->tids.ftid_lock); +} + +static void cxgb4_tc_flower_hash_prio_del(struct adapter *adap, u32 tc_prio) +{ + struct tid_info *t = &adap->tids; + struct ch_tc_flower_entry *fe; + struct rhashtable_iter iter; + u32 found = 0; + + spin_lock_bh(&t->ftid_lock); + /* Bail if the current rule is not the one with the max + * prio. + */ + if (t->tc_hash_tids_max_prio != tc_prio) + goto out_unlock; + + /* Search for the next rule having the same or next lower + * max prio. + */ + rhashtable_walk_enter(&adap->flower_tbl, &iter); + do { + rhashtable_walk_start(&iter); + + fe = rhashtable_walk_next(&iter); + while (!IS_ERR_OR_NULL(fe)) { + if (fe->fs.hash && + fe->fs.tc_prio <= t->tc_hash_tids_max_prio) { + t->tc_hash_tids_max_prio = fe->fs.tc_prio; + found++; + + /* Bail if we found another rule + * having the same prio as the + * current max one. + */ + if (fe->fs.tc_prio == tc_prio) + break; + } + + fe = rhashtable_walk_next(&iter); + } + + rhashtable_walk_stop(&iter); + } while (fe == ERR_PTR(-EAGAIN)); + rhashtable_walk_exit(&iter); + + if (!found) + t->tc_hash_tids_max_prio = 0; + +out_unlock: + spin_unlock_bh(&t->ftid_lock); +} + int cxgb4_tc_flower_replace(struct net_device *dev, struct flow_cls_offload *cls) { @@ -644,6 +702,7 @@ int cxgb4_tc_flower_replace(struct net_device *dev, struct ch_tc_flower_entry *ch_flower; struct ch_filter_specification *fs; struct filter_ctx ctx; + u8 inet_family; int fidx, ret; if (cxgb4_validate_flow_actions(dev, &rule->action, extack)) @@ -664,39 +723,32 @@ int cxgb4_tc_flower_replace(struct net_device *dev, cxgb4_process_flow_actions(dev, &rule->action, fs); fs->hash = is_filter_exact_match(adap, fs); - if (fs->hash) { - fidx = 0; - } else { - u8 inet_family; + inet_family = fs->type ? PF_INET6 : PF_INET; - inet_family = fs->type ? PF_INET6 : PF_INET; - - /* Note that TC uses prio 0 to indicate stack to - * generate automatic prio and hence doesn't pass prio - * 0 to driver. However, the hardware TCAM index - * starts from 0. Hence, the -1 here. - */ - if (cls->common.prio <= (adap->tids.nftids + - adap->tids.nhpftids)) { - fidx = cls->common.prio - 1; - if (fidx < adap->tids.nhpftids) - fs->prio = 1; - } else { - fidx = cxgb4_get_free_ftid(dev, inet_family); - } + /* Get a free filter entry TID, where we can insert this new + * rule. Only insert rule if its prio doesn't conflict with + * existing rules. + */ + fidx = cxgb4_get_free_ftid(dev, inet_family, fs->hash, + cls->common.prio); + if (fidx < 0) { + NL_SET_ERR_MSG_MOD(extack, + "No free LETCAM index available"); + ret = -ENOMEM; + goto free_entry; + } - /* Only insert FLOWER rule if its priority doesn't - * conflict with existing rules in the LETCAM. - */ - if (fidx < 0 || - !cxgb4_filter_prio_in_range(dev, fidx, cls->common.prio)) { - NL_SET_ERR_MSG_MOD(extack, - "No free LETCAM index available"); - ret = -ENOMEM; - goto free_entry; - } + if (fidx < adap->tids.nhpftids) { + fs->prio = 1; + fs->hash = 0; } + /* If the rule can be inserted into HASH region, then ignore + * the index to normal FILTER region. + */ + if (fs->hash) + fidx = 0; + fs->tc_prio = cls->common.prio; fs->tc_cookie = cls->cookie; @@ -727,6 +779,9 @@ int cxgb4_tc_flower_replace(struct net_device *dev, if (ret) goto del_filter; + if (fs->hash) + cxgb4_tc_flower_hash_prio_add(adap, cls->common.prio); + return 0; del_filter: @@ -742,12 +797,17 @@ int cxgb4_tc_flower_destroy(struct net_device *dev, { struct adapter *adap = netdev2adap(dev); struct ch_tc_flower_entry *ch_flower; + u32 tc_prio; + bool hash; int ret; ch_flower = ch_flower_lookup(adap, cls->cookie); if (!ch_flower) return -ENOENT; + hash = ch_flower->fs.hash; + tc_prio = ch_flower->fs.tc_prio; + ret = cxgb4_del_filter(dev, ch_flower->filter_id, &ch_flower->fs); if (ret) goto err; @@ -760,6 +820,9 @@ int cxgb4_tc_flower_destroy(struct net_device *dev, } kfree_rcu(ch_flower, rcu); + if (hash) + cxgb4_tc_flower_hash_prio_del(adap, tc_prio); + err: return ret; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c index d80dee4d316d..8a5ae8bc9b7d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c @@ -198,22 +198,14 @@ static int cxgb4_matchall_alloc_filter(struct net_device *dev, struct ch_filter_specification *fs; int ret, fidx; - /* Note that TC uses prio 0 to indicate stack to generate - * automatic prio and hence doesn't pass prio 0 to driver. - * However, the hardware TCAM index starts from 0. Hence, the - * -1 here. 1 slot is enough to create a wildcard matchall - * VIID rule. + /* Get a free filter entry TID, where we can insert this new + * rule. Only insert rule if its prio doesn't conflict with + * existing rules. + * + * 1 slot is enough to create a wildcard matchall VIID rule. */ - if (cls->common.prio <= (adap->tids.nftids + adap->tids.nhpftids)) - fidx = cls->common.prio - 1; - else - fidx = cxgb4_get_free_ftid(dev, PF_INET); - - /* Only insert MATCHALL rule if its priority doesn't conflict - * with existing rules in the LETCAM. - */ - if (fidx < 0 || - !cxgb4_filter_prio_in_range(dev, fidx, cls->common.prio)) { + fidx = cxgb4_get_free_ftid(dev, PF_INET, false, cls->common.prio); + if (fidx < 0) { NL_SET_ERR_MSG_MOD(extack, "No free LETCAM index available"); return -ENOMEM; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c index 269b8d9e25e0..3f3c11e54d97 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -155,9 +155,10 @@ int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) struct ch_filter_specification fs; struct cxgb4_tc_u32_table *t; struct cxgb4_link *link; - unsigned int filter_id; u32 uhtid, link_uhtid; bool is_ipv6 = false; + u8 inet_family; + int filter_id; int ret; if (!can_tc_u32_offload(dev)) @@ -166,18 +167,15 @@ int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) if (protocol != htons(ETH_P_IP) && protocol != htons(ETH_P_IPV6)) return -EOPNOTSUPP; - /* Note that TC uses prio 0 to indicate stack to generate - * automatic prio and hence doesn't pass prio 0 to driver. - * However, the hardware TCAM index starts from 0. Hence, the - * -1 here. - */ - filter_id = TC_U32_NODE(cls->knode.handle) - 1; + inet_family = (protocol == htons(ETH_P_IPV6)) ? PF_INET6 : PF_INET; - /* Only insert U32 rule if its priority doesn't conflict with - * existing rules in the LETCAM. + /* Get a free filter entry TID, where we can insert this new + * rule. Only insert rule if its prio doesn't conflict with + * existing rules. */ - if (filter_id >= adapter->tids.nftids + adapter->tids.nhpftids || - !cxgb4_filter_prio_in_range(dev, filter_id, cls->common.prio)) { + filter_id = cxgb4_get_free_ftid(dev, inet_family, false, + TC_U32_NODE(cls->knode.handle)); + if (filter_id < 0) { NL_SET_ERR_MSG_MOD(extack, "No free LETCAM index available"); return -ENOMEM; @@ -358,23 +356,65 @@ int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) struct cxgb4_link *link = NULL; struct cxgb4_tc_u32_table *t; struct filter_entry *f; + bool found = false; u32 handle, uhtid; + u8 nslots; int ret; if (!can_tc_u32_offload(dev)) return -EOPNOTSUPP; /* Fetch the location to delete the filter. */ - filter_id = TC_U32_NODE(cls->knode.handle) - 1; - if (filter_id >= adapter->tids.nftids + adapter->tids.nhpftids) - return -ERANGE; + max_tids = adapter->tids.nhpftids + adapter->tids.nftids; + + spin_lock_bh(&adapter->tids.ftid_lock); + filter_id = 0; + while (filter_id < max_tids) { + if (filter_id < adapter->tids.nhpftids) { + i = filter_id; + f = &adapter->tids.hpftid_tab[i]; + if (f->valid && f->fs.tc_cookie == cls->knode.handle) { + found = true; + break; + } - if (filter_id < adapter->tids.nhpftids) - f = &adapter->tids.hpftid_tab[filter_id]; - else - f = &adapter->tids.ftid_tab[filter_id - adapter->tids.nhpftids]; + i = find_next_bit(adapter->tids.hpftid_bmap, + adapter->tids.nhpftids, i + 1); + if (i >= adapter->tids.nhpftids) { + filter_id = adapter->tids.nhpftids; + continue; + } + + filter_id = i; + } else { + i = filter_id - adapter->tids.nhpftids; + f = &adapter->tids.ftid_tab[i]; + if (f->valid && f->fs.tc_cookie == cls->knode.handle) { + found = true; + break; + } + + i = find_next_bit(adapter->tids.ftid_bmap, + adapter->tids.nftids, i + 1); + if (i >= adapter->tids.nftids) + break; + + filter_id = i + adapter->tids.nhpftids; + } + + nslots = 0; + if (f->fs.type) { + nslots++; + if (CHELSIO_CHIP_VERSION(adapter->params.chip) < + CHELSIO_T6) + nslots += 2; + } + + filter_id += nslots; + } + spin_unlock_bh(&adapter->tids.ftid_lock); - if (cls->knode.handle != f->fs.tc_cookie) + if (!found) return -ERANGE; t = adapter->tc_u32; @@ -407,7 +447,6 @@ int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) /* If a link is being deleted, then delete all filters * associated with the link. */ - max_tids = adapter->tids.nftids; for (i = 0; i < t->size; i++) { link = &t->table[i]; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 03b9bdc812cc..be831317520a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -149,6 +149,8 @@ struct tid_info { atomic_t conns_in_use; /* lock for setting/clearing filter bitmap */ spinlock_t ftid_lock; + + unsigned int tc_hash_tids_max_prio; }; static inline void *lookup_tid(const struct tid_info *t, unsigned int tid) @@ -263,7 +265,8 @@ struct filter_ctx { struct ch_filter_specification; -int cxgb4_get_free_ftid(struct net_device *dev, int family); +int cxgb4_get_free_ftid(struct net_device *dev, u8 family, bool hash_en, + u32 tc_prio); int __cxgb4_set_filter(struct net_device *dev, int filter_id, struct ch_filter_specification *fs, struct filter_ctx *ctx); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 844fdcf55118..df97200c52ee 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -1379,8 +1379,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x9608, 0x9638, 0x9640, 0x96f4, 0x9800, 0x9808, - 0x9820, 0x983c, - 0x9850, 0x9864, + 0x9810, 0x9864, 0x9c00, 0x9c6c, 0x9c80, 0x9cec, 0x9d00, 0x9d6c, @@ -1389,7 +1388,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x9e80, 0x9eec, 0x9f00, 0x9f6c, 0x9f80, 0xa020, - 0xd004, 0xd004, + 0xd000, 0xd004, 0xd010, 0xd03c, 0xdfc0, 0xdfe0, 0xe000, 0x1106c, @@ -1430,10 +1429,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x1a0b0, 0x1a0e4, 0x1a0ec, 0x1a0f8, 0x1a100, 0x1a108, - 0x1a114, 0x1a120, - 0x1a128, 0x1a130, - 0x1a138, 0x1a138, - 0x1a190, 0x1a1c4, + 0x1a114, 0x1a130, + 0x1a138, 0x1a1c4, 0x1a1fc, 0x1a1fc, 0x1e008, 0x1e00c, 0x1e040, 0x1e044, @@ -2162,8 +2159,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x9640, 0x9704, 0x9710, 0x971c, 0x9800, 0x9808, - 0x9820, 0x983c, - 0x9850, 0x9864, + 0x9810, 0x9864, 0x9c00, 0x9c6c, 0x9c80, 0x9cec, 0x9d00, 0x9d6c, @@ -2172,7 +2168,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x9e80, 0x9eec, 0x9f00, 0x9f6c, 0x9f80, 0xa020, - 0xd004, 0xd03c, + 0xd000, 0xd03c, 0xd100, 0xd118, 0xd200, 0xd214, 0xd220, 0xd234, @@ -2240,10 +2236,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x1a0b0, 0x1a0e4, 0x1a0ec, 0x1a0f8, 0x1a100, 0x1a108, - 0x1a114, 0x1a120, - 0x1a128, 0x1a130, - 0x1a138, 0x1a138, - 0x1a190, 0x1a1c4, + 0x1a114, 0x1a130, + 0x1a138, 0x1a1c4, 0x1a1fc, 0x1a1fc, 0x1e008, 0x1e00c, 0x1e040, 0x1e044, diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index 9d9f0545fbfe..d6ed1d943762 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -1408,6 +1408,9 @@ static int be_set_priv_flags(struct net_device *netdev, u32 flags) } const struct ethtool_ops be_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_USE_ADAPTIVE | + ETHTOOL_COALESCE_USECS_LOW_HIGH, .get_drvinfo = be_get_drvinfo, .get_wol = be_get_wol, .set_wol = be_set_wol, diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index c0fe305b9d16..46039d80bb43 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -1,4 +1,5 @@ /* Copyright 2008 - 2016 Freescale Semiconductor Inc. + * Copyright 2020 NXP * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -123,7 +124,22 @@ MODULE_PARM_DESC(tx_timeout, "The Tx timeout in ms"); #define FSL_QMAN_MAX_OAL 127 /* Default alignment for start of data in an Rx FD */ +#ifdef CONFIG_DPAA_ERRATUM_A050385 +/* aligning data start to 64 avoids DMA transaction splits, unless the buffer + * is crossing a 4k page boundary + */ +#define DPAA_FD_DATA_ALIGNMENT (fman_has_errata_a050385() ? 64 : 16) +/* aligning to 256 avoids DMA transaction splits caused by 4k page boundary + * crossings; also, all SG fragments except the last must have a size multiple + * of 256 to avoid DMA transaction splits + */ +#define DPAA_A050385_ALIGN 256 +#define DPAA_FD_RX_DATA_ALIGNMENT (fman_has_errata_a050385() ? \ + DPAA_A050385_ALIGN : 16) +#else #define DPAA_FD_DATA_ALIGNMENT 16 +#define DPAA_FD_RX_DATA_ALIGNMENT DPAA_FD_DATA_ALIGNMENT +#endif /* The DPAA requires 256 bytes reserved and mapped for the SGT */ #define DPAA_SGT_SIZE 256 @@ -158,8 +174,13 @@ MODULE_PARM_DESC(tx_timeout, "The Tx timeout in ms"); #define DPAA_PARSE_RESULTS_SIZE sizeof(struct fman_prs_result) #define DPAA_TIME_STAMP_SIZE 8 #define DPAA_HASH_RESULTS_SIZE 8 +#ifdef CONFIG_DPAA_ERRATUM_A050385 +#define DPAA_RX_PRIV_DATA_SIZE (DPAA_A050385_ALIGN - (DPAA_PARSE_RESULTS_SIZE\ + + DPAA_TIME_STAMP_SIZE + DPAA_HASH_RESULTS_SIZE)) +#else #define DPAA_RX_PRIV_DATA_SIZE (u16)(DPAA_TX_PRIV_DATA_SIZE + \ dpaa_rx_extra_headroom) +#endif #define DPAA_ETH_PCD_RXQ_NUM 128 @@ -180,7 +201,12 @@ static struct dpaa_bp *dpaa_bp_array[BM_MAX_NUM_OF_POOLS]; #define DPAA_BP_RAW_SIZE 4096 +#ifdef CONFIG_DPAA_ERRATUM_A050385 +#define dpaa_bp_size(raw_size) (SKB_WITH_OVERHEAD(raw_size) & \ + ~(DPAA_A050385_ALIGN - 1)) +#else #define dpaa_bp_size(raw_size) SKB_WITH_OVERHEAD(raw_size) +#endif static int dpaa_max_frm; @@ -1204,7 +1230,7 @@ static int dpaa_eth_init_rx_port(struct fman_port *port, struct dpaa_bp *bp, buf_prefix_content.pass_prs_result = true; buf_prefix_content.pass_hash_result = true; buf_prefix_content.pass_time_stamp = true; - buf_prefix_content.data_align = DPAA_FD_DATA_ALIGNMENT; + buf_prefix_content.data_align = DPAA_FD_RX_DATA_ALIGNMENT; rx_p = ¶ms.specific_params.rx_params; rx_p->err_fqid = errq->fqid; @@ -1674,6 +1700,8 @@ static u8 rx_csum_offload(const struct dpaa_priv *priv, const struct qm_fd *fd) return CHECKSUM_NONE; } +#define PTR_IS_ALIGNED(x, a) (IS_ALIGNED((unsigned long)(x), (a))) + /* Build a linear skb around the received buffer. * We are guaranteed there is enough room at the end of the data buffer to * accommodate the shared info area of the skb. @@ -1745,8 +1773,7 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv, sg_addr = qm_sg_addr(&sgt[i]); sg_vaddr = phys_to_virt(sg_addr); - WARN_ON(!IS_ALIGNED((unsigned long)sg_vaddr, - SMP_CACHE_BYTES)); + WARN_ON(!PTR_IS_ALIGNED(sg_vaddr, SMP_CACHE_BYTES)); dma_unmap_page(priv->rx_dma_dev, sg_addr, DPAA_BP_RAW_SIZE, DMA_FROM_DEVICE); @@ -2034,6 +2061,75 @@ static inline int dpaa_xmit(struct dpaa_priv *priv, return 0; } +#ifdef CONFIG_DPAA_ERRATUM_A050385 +int dpaa_a050385_wa(struct net_device *net_dev, struct sk_buff **s) +{ + struct dpaa_priv *priv = netdev_priv(net_dev); + struct sk_buff *new_skb, *skb = *s; + unsigned char *start, i; + + /* check linear buffer alignment */ + if (!PTR_IS_ALIGNED(skb->data, DPAA_A050385_ALIGN)) + goto workaround; + + /* linear buffers just need to have an aligned start */ + if (!skb_is_nonlinear(skb)) + return 0; + + /* linear data size for nonlinear skbs needs to be aligned */ + if (!IS_ALIGNED(skb_headlen(skb), DPAA_A050385_ALIGN)) + goto workaround; + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + /* all fragments need to have aligned start addresses */ + if (!IS_ALIGNED(skb_frag_off(frag), DPAA_A050385_ALIGN)) + goto workaround; + + /* all but last fragment need to have aligned sizes */ + if (!IS_ALIGNED(skb_frag_size(frag), DPAA_A050385_ALIGN) && + (i < skb_shinfo(skb)->nr_frags - 1)) + goto workaround; + } + + return 0; + +workaround: + /* copy all the skb content into a new linear buffer */ + new_skb = netdev_alloc_skb(net_dev, skb->len + DPAA_A050385_ALIGN - 1 + + priv->tx_headroom); + if (!new_skb) + return -ENOMEM; + + /* NET_SKB_PAD bytes already reserved, adding up to tx_headroom */ + skb_reserve(new_skb, priv->tx_headroom - NET_SKB_PAD); + + /* Workaround for DPAA_A050385 requires data start to be aligned */ + start = PTR_ALIGN(new_skb->data, DPAA_A050385_ALIGN); + if (start - new_skb->data != 0) + skb_reserve(new_skb, start - new_skb->data); + + skb_put(new_skb, skb->len); + skb_copy_bits(skb, 0, new_skb->data, skb->len); + skb_copy_header(new_skb, skb); + new_skb->dev = skb->dev; + + /* We move the headroom when we align it so we have to reset the + * network and transport header offsets relative to the new data + * pointer. The checksum offload relies on these offsets. + */ + skb_set_network_header(new_skb, skb_network_offset(skb)); + skb_set_transport_header(new_skb, skb_transport_offset(skb)); + + /* TODO: does timestamping need the result in the old skb? */ + dev_kfree_skb(skb); + *s = new_skb; + + return 0; +} +#endif + static netdev_tx_t dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev) { @@ -2080,6 +2176,14 @@ dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev) nonlinear = skb_is_nonlinear(skb); } +#ifdef CONFIG_DPAA_ERRATUM_A050385 + if (unlikely(fman_has_errata_a050385())) { + if (dpaa_a050385_wa(net_dev, &skb)) + goto enomem; + nonlinear = skb_is_nonlinear(skb); + } +#endif + if (nonlinear) { /* Just create a S/G fd based on the skb */ err = skb_to_sg_fd(priv, skb, &fd); @@ -2753,9 +2857,7 @@ static inline u16 dpaa_get_headroom(struct dpaa_buffer_layout *bl) headroom = (u16)(bl->priv_data_size + DPAA_PARSE_RESULTS_SIZE + DPAA_TIME_STAMP_SIZE + DPAA_HASH_RESULTS_SIZE); - return DPAA_FD_DATA_ALIGNMENT ? ALIGN(headroom, - DPAA_FD_DATA_ALIGNMENT) : - headroom; + return ALIGN(headroom, DPAA_FD_DATA_ALIGNMENT); } static int dpaa_eth_probe(struct platform_device *pdev) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c index 6aa1fa22cd04..9db2a02fb531 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c @@ -525,7 +525,6 @@ static int dpaa_get_coalesce(struct net_device *dev, c->rx_coalesce_usecs = period; c->rx_max_coalesced_frames = thresh; - c->use_adaptive_rx_coalesce = false; return 0; } @@ -540,9 +539,6 @@ static int dpaa_set_coalesce(struct net_device *dev, u8 thresh, prev_thresh; int cpu, res; - if (c->use_adaptive_rx_coalesce) - return -EINVAL; - period = c->rx_coalesce_usecs; thresh = c->rx_max_coalesced_frames; @@ -582,6 +578,8 @@ revert_values: } const struct ethtool_ops dpaa_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS | + ETHTOOL_COALESCE_RX_MAX_FRAMES, .get_drvinfo = dpaa_get_drvinfo, .get_msglevel = dpaa_get_msglevel, .set_msglevel = dpaa_set_msglevel, diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 4e4a49179f0b..85e2b741df41 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -8,7 +8,6 @@ #include "enetc_pf.h" #define ENETC_DRV_NAME_STR "ENETC PF driver" -static const char enetc_drv_name[] = ENETC_DRV_NAME_STR; static void enetc_pf_get_primary_mac_addr(struct enetc_hw *hw, int si, u8 *addr) { diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c index 28a786b2f3e7..f14576212a0e 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c @@ -5,7 +5,6 @@ #include "enetc.h" #define ENETC_DRV_NAME_STR "ENETC VF driver" -static const char enetc_drv_name[] = ENETC_DRV_NAME_STR; /* Messaging */ static void enetc_msg_vsi_write_msg(struct enetc_hw *hw, diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index af7653e341f2..c1c267b61647 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2528,15 +2528,15 @@ fec_enet_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec) return -EINVAL; } - cycle = fec_enet_us_to_itr_clock(ndev, fep->rx_time_itr); + cycle = fec_enet_us_to_itr_clock(ndev, ec->rx_coalesce_usecs); if (cycle > 0xFFFF) { dev_err(dev, "Rx coalesced usec exceed hardware limitation\n"); return -EINVAL; } - cycle = fec_enet_us_to_itr_clock(ndev, fep->tx_time_itr); + cycle = fec_enet_us_to_itr_clock(ndev, ec->tx_coalesce_usecs); if (cycle > 0xFFFF) { - dev_err(dev, "Rx coalesced usec exceed hardware limitation\n"); + dev_err(dev, "Tx coalesced usec exceed hardware limitation\n"); return -EINVAL; } @@ -2641,6 +2641,8 @@ fec_enet_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) } static const struct ethtool_ops fec_enet_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES, .get_drvinfo = fec_enet_get_drvinfo, .get_regs_len = fec_enet_get_regs_len, .get_regs = fec_enet_get_regs, diff --git a/drivers/net/ethernet/freescale/fman/Kconfig b/drivers/net/ethernet/freescale/fman/Kconfig index 0139cb9042ec..34150182cc35 100644 --- a/drivers/net/ethernet/freescale/fman/Kconfig +++ b/drivers/net/ethernet/freescale/fman/Kconfig @@ -8,3 +8,31 @@ config FSL_FMAN help Freescale Data-Path Acceleration Architecture Frame Manager (FMan) support + +config DPAA_ERRATUM_A050385 + bool + depends on ARM64 && FSL_DPAA + default y + help + DPAA FMan erratum A050385 software workaround implementation: + align buffers, data start, SG fragment length to avoid FMan DMA + splits. + FMAN DMA read or writes under heavy traffic load may cause FMAN + internal resource leak thus stopping further packet processing. + The FMAN internal queue can overflow when FMAN splits single + read or write transactions into multiple smaller transactions + such that more than 17 AXI transactions are in flight from FMAN + to interconnect. When the FMAN internal queue overflows, it can + stall further packet processing. The issue can occur with any + one of the following three conditions: + 1. FMAN AXI transaction crosses 4K address boundary (Errata + A010022) + 2. FMAN DMA address for an AXI transaction is not 16 byte + aligned, i.e. the last 4 bits of an address are non-zero + 3. Scatter Gather (SG) frames have more than one SG buffer in + the SG list and any one of the buffers, except the last + buffer in the SG list has data size that is not a multiple + of 16 bytes, i.e., other than 16, 32, 48, 64, etc. + With any one of the above three conditions present, there is + likelihood of stalled FMAN packet processing, especially under + stress with multiple ports injecting line-rate traffic. diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c index 934111def0be..f151d6e111dd 100644 --- a/drivers/net/ethernet/freescale/fman/fman.c +++ b/drivers/net/ethernet/freescale/fman/fman.c @@ -1,5 +1,6 @@ /* * Copyright 2008-2015 Freescale Semiconductor Inc. + * Copyright 2020 NXP * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -566,6 +567,10 @@ struct fman_cfg { u32 qmi_def_tnums_thresh; }; +#ifdef CONFIG_DPAA_ERRATUM_A050385 +static bool fman_has_err_a050385; +#endif + static irqreturn_t fman_exceptions(struct fman *fman, enum fman_exceptions exception) { @@ -2518,6 +2523,14 @@ struct fman *fman_bind(struct device *fm_dev) } EXPORT_SYMBOL(fman_bind); +#ifdef CONFIG_DPAA_ERRATUM_A050385 +bool fman_has_errata_a050385(void) +{ + return fman_has_err_a050385; +} +EXPORT_SYMBOL(fman_has_errata_a050385); +#endif + static irqreturn_t fman_err_irq(int irq, void *handle) { struct fman *fman = (struct fman *)handle; @@ -2845,6 +2858,11 @@ static struct fman *read_dts_node(struct platform_device *of_dev) goto fman_free; } +#ifdef CONFIG_DPAA_ERRATUM_A050385 + fman_has_err_a050385 = + of_property_read_bool(fm_node, "fsl,erratum-a050385"); +#endif + return fman; fman_node_put: diff --git a/drivers/net/ethernet/freescale/fman/fman.h b/drivers/net/ethernet/freescale/fman/fman.h index 935c317fa696..f2ede1360f03 100644 --- a/drivers/net/ethernet/freescale/fman/fman.h +++ b/drivers/net/ethernet/freescale/fman/fman.h @@ -1,5 +1,6 @@ /* * Copyright 2008-2015 Freescale Semiconductor Inc. + * Copyright 2020 NXP * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -398,6 +399,10 @@ u16 fman_get_max_frm(void); int fman_get_rx_extra_headroom(void); +#ifdef CONFIG_DPAA_ERRATUM_A050385 +bool fman_has_errata_a050385(void); +#endif + struct fman *fman_bind(struct device *dev); #endif /* __FM_H */ diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c index f82e0be4d309..cc7d4f93da54 100644 --- a/drivers/net/ethernet/freescale/gianfar_ethtool.c +++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c @@ -1474,6 +1474,8 @@ static int gfar_get_ts_info(struct net_device *dev, } const struct ethtool_ops gfar_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES, .get_drvinfo = gfar_gdrvinfo, .get_regs_len = gfar_reglen, .get_regs = gfar_get_regs, diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 717fccc2efba..49624acf2473 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -1264,6 +1264,11 @@ static int hns_get_rxnfc(struct net_device *netdev, } static const struct ethtool_ops hns_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USE_ADAPTIVE | + ETHTOOL_COALESCE_USECS_LOW_HIGH | + ETHTOOL_COALESCE_MAX_FRAMES_LOW_HIGH, .get_drvinfo = hns_nic_get_drvinfo, .get_link = hns_nic_get_link, .get_ringparam = hns_get_ringparam, diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h index 1b0313900f98..948e67ef30fd 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h +++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h @@ -7,8 +7,6 @@ #include <linux/mutex.h> #include <linux/types.h> -#define HCLGE_MBX_VF_MSG_DATA_NUM 16 - enum HCLGE_MBX_OPCODE { HCLGE_MBX_RESET = 0x01, /* (VF -> PF) assert reset */ HCLGE_MBX_ASSERTING_RESET, /* (PF -> VF) PF is asserting reset*/ @@ -46,6 +44,7 @@ enum HCLGE_MBX_OPCODE { HCLGE_MBX_PUSH_VLAN_INFO, /* (PF -> VF) push port base vlan */ HCLGE_MBX_GET_MEDIA_TYPE, /* (VF -> PF) get media type */ HCLGE_MBX_PUSH_PROMISC_INFO, /* (PF -> VF) push vf promisc info */ + HCLGE_MBX_VF_UNINIT, /* (VF -> PF) vf is unintializing */ HCLGE_MBX_GET_VF_FLR_STATUS = 200, /* (M7 -> PF) get vf flr status */ HCLGE_MBX_PUSH_LINK_STATUS, /* (M7 -> PF) get port link status */ @@ -71,10 +70,15 @@ enum hclge_mbx_vlan_cfg_subcode { HCLGE_MBX_GET_PORT_BASE_VLAN_STATE, /* get port based vlan state */ }; -#define HCLGE_MBX_MAX_MSG_SIZE 16 +#define HCLGE_MBX_MAX_MSG_SIZE 14 #define HCLGE_MBX_MAX_RESP_DATA_SIZE 8U -#define HCLGE_MBX_RING_MAP_BASIC_MSG_NUM 3 -#define HCLGE_MBX_RING_NODE_VARIABLE_NUM 3 +#define HCLGE_MBX_MAX_RING_CHAIN_PARAM_NUM 4 + +struct hclge_ring_chain_param { + u8 ring_type; + u8 tqp_index; + u8 int_gl_index; +}; struct hclgevf_mbx_resp_status { struct mutex mbx_mutex; /* protects against contending sync cmd resp */ @@ -84,6 +88,41 @@ struct hclgevf_mbx_resp_status { u8 additional_info[HCLGE_MBX_MAX_RESP_DATA_SIZE]; }; +struct hclge_respond_to_vf_msg { + int status; + u8 data[HCLGE_MBX_MAX_RESP_DATA_SIZE]; + u16 len; +}; + +struct hclge_vf_to_pf_msg { + u8 code; + union { + struct { + u8 subcode; + u8 data[HCLGE_MBX_MAX_MSG_SIZE]; + }; + struct { + u8 en_bc; + u8 en_uc; + u8 en_mc; + }; + struct { + u8 vector_id; + u8 ring_num; + struct hclge_ring_chain_param + param[HCLGE_MBX_MAX_RING_CHAIN_PARAM_NUM]; + }; + }; +}; + +struct hclge_pf_to_vf_msg { + u16 code; + u16 vf_mbx_msg_code; + u16 vf_mbx_msg_subcode; + u16 resp_status; + u8 resp_data[HCLGE_MBX_MAX_RESP_DATA_SIZE]; +}; + struct hclge_mbx_vf_to_pf_cmd { u8 rsv; u8 mbx_src_vfid; /* Auto filled by IMP */ @@ -91,17 +130,17 @@ struct hclge_mbx_vf_to_pf_cmd { u8 rsv1[1]; u8 msg_len; u8 rsv2[3]; - u8 msg[HCLGE_MBX_MAX_MSG_SIZE]; + struct hclge_vf_to_pf_msg msg; }; -#define HCLGE_MBX_NEED_RESP_BIT BIT(0) +#define HCLGE_MBX_NEED_RESP_B 0 struct hclge_mbx_pf_to_vf_cmd { u8 dest_vfid; u8 rsv[3]; u8 msg_len; u8 rsv1[3]; - u16 msg[8]; + struct hclge_pf_to_vf_msg msg; }; struct hclge_vf_rst_cmd { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index c54f262ac333..8e04d3909321 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -1711,7 +1711,7 @@ static int hns3_setup_tc(struct net_device *netdev, void *type_data) netif_dbg(h, drv, netdev, "setup tc: num_tc=%u\n", tc); return (kinfo->dcb_ops && kinfo->dcb_ops->setup_tc) ? - kinfo->dcb_ops->setup_tc(h, tc, prio_tc) : -EOPNOTSUPP; + kinfo->dcb_ops->setup_tc(h, tc ? tc : 1, prio_tc) : -EOPNOTSUPP; } static int hns3_nic_setup_tc(struct net_device *dev, enum tc_setup_type type, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 3f59a1924390..28b81f24afa1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -1390,7 +1390,13 @@ static int hns3_set_fecparam(struct net_device *netdev, return ops->set_fec(handle, fec_mode); } +#define HNS3_ETHTOOL_COALESCE (ETHTOOL_COALESCE_USECS | \ + ETHTOOL_COALESCE_USE_ADAPTIVE | \ + ETHTOOL_COALESCE_RX_USECS_HIGH | \ + ETHTOOL_COALESCE_TX_USECS_HIGH) + static const struct ethtool_ops hns3vf_ethtool_ops = { + .supported_coalesce_params = HNS3_ETHTOOL_COALESCE, .get_drvinfo = hns3_get_drvinfo, .get_ringparam = hns3_get_ringparam, .set_ringparam = hns3_set_ringparam, @@ -1416,6 +1422,7 @@ static const struct ethtool_ops hns3vf_ethtool_ops = { }; static const struct ethtool_ops hns3_ethtool_ops = { + .supported_coalesce_params = HNS3_ETHTOOL_COALESCE, .self_test = hns3_self_test, .get_drvinfo = hns3_get_drvinfo, .get_link = hns3_get_link, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index cdf7f4bdef86..75d0d0fcd69b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2449,10 +2449,12 @@ static int hclge_cfg_mac_speed_dup_hw(struct hclge_dev *hdev, int speed, int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex) { + struct hclge_mac *mac = &hdev->hw.mac; int ret; duplex = hclge_check_speed_dup(duplex, speed); - if (hdev->hw.mac.speed == speed && hdev->hw.mac.duplex == duplex) + if (!mac->support_autoneg && mac->speed == speed && + mac->duplex == duplex) return 0; ret = hclge_cfg_mac_speed_dup_hw(hdev, speed, duplex); @@ -7750,16 +7752,27 @@ static int hclge_set_vlan_filter_ctrl(struct hclge_dev *hdev, u8 vlan_type, struct hclge_desc desc; int ret; - hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_FILTER_CTRL, false); - + /* read current vlan filter parameter */ + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_FILTER_CTRL, true); req = (struct hclge_vlan_filter_ctrl_cmd *)desc.data; req->vlan_type = vlan_type; - req->vlan_fe = filter_en ? fe_type : 0; req->vf_id = vf_id; ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to get vlan filter config, ret = %d.\n", ret); + return ret; + } + + /* modify and write new config parameter */ + hclge_cmd_reuse_desc(&desc, false); + req->vlan_fe = filter_en ? + (req->vlan_fe | fe_type) : (req->vlan_fe & ~fe_type); + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) - dev_err(&hdev->pdev->dev, "set vlan filter fail, ret =%d.\n", + dev_err(&hdev->pdev->dev, "failed to set vlan filter, ret = %d.\n", ret); return ret; @@ -8277,6 +8290,7 @@ void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list) kfree(vlan); } } + clear_bit(vport->vport_id, hdev->vf_vlan_full); } void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev) @@ -8493,6 +8507,28 @@ static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid, } } +static void hclge_clear_vf_vlan(struct hclge_dev *hdev) +{ + struct hclge_vlan_info *vlan_info; + struct hclge_vport *vport; + int ret; + int vf; + + /* clear port base vlan for all vf */ + for (vf = HCLGE_VF_VPORT_START_NUM; vf < hdev->num_alloc_vport; vf++) { + vport = &hdev->vport[vf]; + vlan_info = &vport->port_base_vlan_cfg.vlan_info; + + ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q), + vport->vport_id, + vlan_info->vlan_tag, true); + if (ret) + dev_err(&hdev->pdev->dev, + "failed to clear vf vlan for vf%d, ret = %d\n", + vf - HCLGE_VF_VPORT_START_NUM, ret); + } +} + int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto, u16 vlan_id, bool is_kill) { @@ -9902,6 +9938,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) struct hclge_mac *mac = &hdev->hw.mac; hclge_reset_vf_rate(hdev); + hclge_clear_vf_vlan(hdev); hclge_misc_affinity_teardown(hdev); hclge_state_uninit(hdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index a3c0822191a9..7f24fcb4f96a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -5,6 +5,11 @@ #include "hclge_mbx.h" #include "hnae3.h" +static u16 hclge_errno_to_resp(int errno) +{ + return abs(errno); +} + /* hclge_gen_resp_to_vf: used to generate a synchronous response to VF when PF * receives a mailbox message from VF. * @vport: pointer to struct hclge_vport @@ -14,25 +19,25 @@ */ static int hclge_gen_resp_to_vf(struct hclge_vport *vport, struct hclge_mbx_vf_to_pf_cmd *vf_to_pf_req, - int resp_status, - u8 *resp_data, u16 resp_data_len) + struct hclge_respond_to_vf_msg *resp_msg) { struct hclge_mbx_pf_to_vf_cmd *resp_pf_to_vf; struct hclge_dev *hdev = vport->back; enum hclge_cmd_status status; struct hclge_desc desc; + u16 resp; resp_pf_to_vf = (struct hclge_mbx_pf_to_vf_cmd *)desc.data; - if (resp_data_len > HCLGE_MBX_MAX_RESP_DATA_SIZE) { + if (resp_msg->len > HCLGE_MBX_MAX_RESP_DATA_SIZE) { dev_err(&hdev->pdev->dev, "PF fail to gen resp to VF len %u exceeds max len %u\n", - resp_data_len, + resp_msg->len, HCLGE_MBX_MAX_RESP_DATA_SIZE); - /* If resp_data_len is too long, set the value to max length + /* If resp_msg->len is too long, set the value to max length * and return the msg to VF */ - resp_data_len = HCLGE_MBX_MAX_RESP_DATA_SIZE; + resp_msg->len = HCLGE_MBX_MAX_RESP_DATA_SIZE; } hclge_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_PF_TO_VF, false); @@ -40,18 +45,29 @@ static int hclge_gen_resp_to_vf(struct hclge_vport *vport, resp_pf_to_vf->dest_vfid = vf_to_pf_req->mbx_src_vfid; resp_pf_to_vf->msg_len = vf_to_pf_req->msg_len; - resp_pf_to_vf->msg[0] = HCLGE_MBX_PF_VF_RESP; - resp_pf_to_vf->msg[1] = vf_to_pf_req->msg[0]; - resp_pf_to_vf->msg[2] = vf_to_pf_req->msg[1]; - resp_pf_to_vf->msg[3] = (resp_status == 0) ? 0 : 1; + resp_pf_to_vf->msg.code = HCLGE_MBX_PF_VF_RESP; + resp_pf_to_vf->msg.vf_mbx_msg_code = vf_to_pf_req->msg.code; + resp_pf_to_vf->msg.vf_mbx_msg_subcode = vf_to_pf_req->msg.subcode; + resp = hclge_errno_to_resp(resp_msg->status); + if (resp < SHRT_MAX) { + resp_pf_to_vf->msg.resp_status = resp; + } else { + dev_warn(&hdev->pdev->dev, + "failed to send response to VF, response status %d is out-of-bound\n", + resp); + resp_pf_to_vf->msg.resp_status = EIO; + } - if (resp_data && resp_data_len > 0) - memcpy(&resp_pf_to_vf->msg[4], resp_data, resp_data_len); + if (resp_msg->len > 0) + memcpy(resp_pf_to_vf->msg.resp_data, resp_msg->data, + resp_msg->len); status = hclge_cmd_send(&hdev->hw, &desc, 1); if (status) dev_err(&hdev->pdev->dev, - "PF failed(=%d) to send response to VF\n", status); + "failed to send response to VF, status: %d, vfid: %u, code: %u, subcode: %u.\n", + status, vf_to_pf_req->mbx_src_vfid, + vf_to_pf_req->msg.code, vf_to_pf_req->msg.subcode); return status; } @@ -70,15 +86,15 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len, resp_pf_to_vf->dest_vfid = dest_vfid; resp_pf_to_vf->msg_len = msg_len; - resp_pf_to_vf->msg[0] = mbx_opcode; + resp_pf_to_vf->msg.code = mbx_opcode; - memcpy(&resp_pf_to_vf->msg[1], msg, msg_len); + memcpy(&resp_pf_to_vf->msg.vf_mbx_msg_code, msg, msg_len); status = hclge_cmd_send(&hdev->hw, &desc, 1); if (status) dev_err(&hdev->pdev->dev, - "PF failed(=%d) to send mailbox message to VF\n", - status); + "failed to send mailbox to VF, status: %d, vfid: %u, opcode: %u\n", + status, dest_vfid, mbx_opcode); return status; } @@ -138,21 +154,20 @@ static int hclge_get_ring_chain_from_mbx( { struct hnae3_ring_chain_node *cur_chain, *new_chain; int ring_num; - int i; + int i = 0; - ring_num = req->msg[2]; + ring_num = req->msg.ring_num; - if (ring_num > ((HCLGE_MBX_VF_MSG_DATA_NUM - - HCLGE_MBX_RING_MAP_BASIC_MSG_NUM) / - HCLGE_MBX_RING_NODE_VARIABLE_NUM)) + if (ring_num > HCLGE_MBX_MAX_RING_CHAIN_PARAM_NUM) return -ENOMEM; - hnae3_set_bit(ring_chain->flag, HNAE3_RING_TYPE_B, req->msg[3]); + hnae3_set_bit(ring_chain->flag, HNAE3_RING_TYPE_B, + req->msg.param[i].ring_type); ring_chain->tqp_index = - hclge_get_queue_id(vport->nic.kinfo.tqp[req->msg[4]]); + hclge_get_queue_id(vport->nic.kinfo.tqp + [req->msg.param[i].tqp_index]); hnae3_set_field(ring_chain->int_gl_idx, HNAE3_RING_GL_IDX_M, - HNAE3_RING_GL_IDX_S, - req->msg[5]); + HNAE3_RING_GL_IDX_S, req->msg.param[i].int_gl_index); cur_chain = ring_chain; @@ -162,18 +177,15 @@ static int hclge_get_ring_chain_from_mbx( goto err; hnae3_set_bit(new_chain->flag, HNAE3_RING_TYPE_B, - req->msg[HCLGE_MBX_RING_NODE_VARIABLE_NUM * i + - HCLGE_MBX_RING_MAP_BASIC_MSG_NUM]); + req->msg.param[i].ring_type); new_chain->tqp_index = hclge_get_queue_id(vport->nic.kinfo.tqp - [req->msg[HCLGE_MBX_RING_NODE_VARIABLE_NUM * i + - HCLGE_MBX_RING_MAP_BASIC_MSG_NUM + 1]]); + [req->msg.param[i].tqp_index]); hnae3_set_field(new_chain->int_gl_idx, HNAE3_RING_GL_IDX_M, HNAE3_RING_GL_IDX_S, - req->msg[HCLGE_MBX_RING_NODE_VARIABLE_NUM * i + - HCLGE_MBX_RING_MAP_BASIC_MSG_NUM + 2]); + req->msg.param[i].int_gl_index); cur_chain->next = new_chain; cur_chain = new_chain; @@ -189,7 +201,7 @@ static int hclge_map_unmap_ring_to_vf_vector(struct hclge_vport *vport, bool en, struct hclge_mbx_vf_to_pf_cmd *req) { struct hnae3_ring_chain_node ring_chain; - int vector_id = req->msg[1]; + int vector_id = req->msg.vector_id; int ret; memset(&ring_chain, 0, sizeof(ring_chain)); @@ -207,13 +219,9 @@ static int hclge_map_unmap_ring_to_vf_vector(struct hclge_vport *vport, bool en, static int hclge_set_vf_promisc_mode(struct hclge_vport *vport, struct hclge_mbx_vf_to_pf_cmd *req) { -#define HCLGE_MBX_BC_INDEX 1 -#define HCLGE_MBX_UC_INDEX 2 -#define HCLGE_MBX_MC_INDEX 3 - - bool en_bc = req->msg[HCLGE_MBX_BC_INDEX] ? true : false; - bool en_uc = req->msg[HCLGE_MBX_UC_INDEX] ? true : false; - bool en_mc = req->msg[HCLGE_MBX_MC_INDEX] ? true : false; + bool en_bc = req->msg.en_bc ? true : false; + bool en_uc = req->msg.en_uc ? true : false; + bool en_mc = req->msg.en_mc ? true : false; int ret; if (!vport->vf_info.trusted) { @@ -222,8 +230,6 @@ static int hclge_set_vf_promisc_mode(struct hclge_vport *vport, } ret = hclge_set_vport_promisc_mode(vport, en_uc, en_mc, en_bc); - if (req->mbx_need_resp) - hclge_gen_resp_to_vf(vport, req, ret, NULL, 0); vport->vf_info.promisc_enable = (en_uc || en_mc) ? 1 : 0; @@ -244,26 +250,25 @@ void hclge_inform_vf_promisc_info(struct hclge_vport *vport) static int hclge_set_vf_uc_mac_addr(struct hclge_vport *vport, struct hclge_mbx_vf_to_pf_cmd *mbx_req) { - const u8 *mac_addr = (const u8 *)(&mbx_req->msg[2]); +#define HCLGE_MBX_VF_OLD_MAC_ADDR_OFFSET 6 + + const u8 *mac_addr = (const u8 *)(mbx_req->msg.data); struct hclge_dev *hdev = vport->back; int status; - if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_UC_MODIFY) { - const u8 *old_addr = (const u8 *)(&mbx_req->msg[8]); + if (mbx_req->msg.subcode == HCLGE_MBX_MAC_VLAN_UC_MODIFY) { + const u8 *old_addr = (const u8 *) + (&mbx_req->msg.data[HCLGE_MBX_VF_OLD_MAC_ADDR_OFFSET]); /* If VF MAC has been configured by the host then it * cannot be overridden by the MAC specified by the VM. */ if (!is_zero_ether_addr(vport->vf_info.mac) && - !ether_addr_equal(mac_addr, vport->vf_info.mac)) { - status = -EPERM; - goto out; - } + !ether_addr_equal(mac_addr, vport->vf_info.mac)) + return -EPERM; - if (!is_valid_ether_addr(mac_addr)) { - status = -EINVAL; - goto out; - } + if (!is_valid_ether_addr(mac_addr)) + return -EINVAL; hclge_rm_uc_addr_common(vport, old_addr); status = hclge_add_uc_addr_common(vport, mac_addr); @@ -275,12 +280,12 @@ static int hclge_set_vf_uc_mac_addr(struct hclge_vport *vport, hclge_add_vport_mac_table(vport, mac_addr, HCLGE_MAC_ADDR_UC); } - } else if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_UC_ADD) { + } else if (mbx_req->msg.subcode == HCLGE_MBX_MAC_VLAN_UC_ADD) { status = hclge_add_uc_addr_common(vport, mac_addr); if (!status) hclge_add_vport_mac_table(vport, mac_addr, HCLGE_MAC_ADDR_UC); - } else if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_UC_REMOVE) { + } else if (mbx_req->msg.subcode == HCLGE_MBX_MAC_VLAN_UC_REMOVE) { status = hclge_rm_uc_addr_common(vport, mac_addr); if (!status) hclge_rm_vport_mac_table(vport, mac_addr, @@ -288,33 +293,26 @@ static int hclge_set_vf_uc_mac_addr(struct hclge_vport *vport, } else { dev_err(&hdev->pdev->dev, "failed to set unicast mac addr, unknown subcode %u\n", - mbx_req->msg[1]); + mbx_req->msg.subcode); return -EIO; } -out: - if (mbx_req->mbx_need_resp & HCLGE_MBX_NEED_RESP_BIT) - hclge_gen_resp_to_vf(vport, mbx_req, status, NULL, 0); - - return 0; + return status; } static int hclge_set_vf_mc_mac_addr(struct hclge_vport *vport, - struct hclge_mbx_vf_to_pf_cmd *mbx_req, - bool gen_resp) + struct hclge_mbx_vf_to_pf_cmd *mbx_req) { - const u8 *mac_addr = (const u8 *)(&mbx_req->msg[2]); + const u8 *mac_addr = (const u8 *)(mbx_req->msg.data); struct hclge_dev *hdev = vport->back; - u8 resp_len = 0; - u8 resp_data; int status; - if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_MC_ADD) { + if (mbx_req->msg.subcode == HCLGE_MBX_MAC_VLAN_MC_ADD) { status = hclge_add_mc_addr_common(vport, mac_addr); if (!status) hclge_add_vport_mac_table(vport, mac_addr, HCLGE_MAC_ADDR_MC); - } else if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_MC_REMOVE) { + } else if (mbx_req->msg.subcode == HCLGE_MBX_MAC_VLAN_MC_REMOVE) { status = hclge_rm_mc_addr_common(vport, mac_addr); if (!status) hclge_rm_vport_mac_table(vport, mac_addr, @@ -322,15 +320,11 @@ static int hclge_set_vf_mc_mac_addr(struct hclge_vport *vport, } else { dev_err(&hdev->pdev->dev, "failed to set mcast mac addr, unknown subcode %u\n", - mbx_req->msg[1]); + mbx_req->msg.subcode); return -EIO; } - if (gen_resp) - hclge_gen_resp_to_vf(vport, mbx_req, status, - &resp_data, resp_len); - - return 0; + return status; } int hclge_push_vf_port_base_vlan_info(struct hclge_vport *vport, u8 vfid, @@ -351,12 +345,16 @@ int hclge_push_vf_port_base_vlan_info(struct hclge_vport *vport, u8 vfid, } static int hclge_set_vf_vlan_cfg(struct hclge_vport *vport, - struct hclge_mbx_vf_to_pf_cmd *mbx_req) + struct hclge_mbx_vf_to_pf_cmd *mbx_req, + struct hclge_respond_to_vf_msg *resp_msg) { +#define HCLGE_MBX_VLAN_STATE_OFFSET 0 +#define HCLGE_MBX_VLAN_INFO_OFFSET 2 + struct hclge_vf_vlan_cfg *msg_cmd; int status = 0; - msg_cmd = (struct hclge_vf_vlan_cfg *)mbx_req->msg; + msg_cmd = (struct hclge_vf_vlan_cfg *)&mbx_req->msg; if (msg_cmd->subcode == HCLGE_MBX_VLAN_FILTER) { struct hnae3_handle *handle = &vport->nic; u16 vlan, proto; @@ -367,38 +365,32 @@ static int hclge_set_vf_vlan_cfg(struct hclge_vport *vport, proto = msg_cmd->proto; status = hclge_set_vlan_filter(handle, cpu_to_be16(proto), vlan, is_kill); - if (mbx_req->mbx_need_resp) - return hclge_gen_resp_to_vf(vport, mbx_req, status, - NULL, 0); } else if (msg_cmd->subcode == HCLGE_MBX_VLAN_RX_OFF_CFG) { struct hnae3_handle *handle = &vport->nic; bool en = msg_cmd->is_kill ? true : false; status = hclge_en_hw_strip_rxvtag(handle, en); - } else if (mbx_req->msg[1] == HCLGE_MBX_PORT_BASE_VLAN_CFG) { + } else if (msg_cmd->subcode == HCLGE_MBX_PORT_BASE_VLAN_CFG) { struct hclge_vlan_info *vlan_info; u16 *state; - state = (u16 *)&mbx_req->msg[2]; - vlan_info = (struct hclge_vlan_info *)&mbx_req->msg[4]; + state = (u16 *)&mbx_req->msg.data[HCLGE_MBX_VLAN_STATE_OFFSET]; + vlan_info = (struct hclge_vlan_info *) + &mbx_req->msg.data[HCLGE_MBX_VLAN_INFO_OFFSET]; status = hclge_update_port_base_vlan_cfg(vport, *state, vlan_info); - } else if (mbx_req->msg[1] == HCLGE_MBX_GET_PORT_BASE_VLAN_STATE) { - u8 state; - - state = vport->port_base_vlan_cfg.state; - status = hclge_gen_resp_to_vf(vport, mbx_req, 0, &state, - sizeof(u8)); + } else if (msg_cmd->subcode == HCLGE_MBX_GET_PORT_BASE_VLAN_STATE) { + resp_msg->data[0] = vport->port_base_vlan_cfg.state; + resp_msg->len = sizeof(u8); } return status; } static int hclge_set_vf_alive(struct hclge_vport *vport, - struct hclge_mbx_vf_to_pf_cmd *mbx_req, - bool gen_resp) + struct hclge_mbx_vf_to_pf_cmd *mbx_req) { - bool alive = !!mbx_req->msg[2]; + bool alive = !!mbx_req->msg.data[0]; int ret = 0; if (alive) @@ -409,73 +401,76 @@ static int hclge_set_vf_alive(struct hclge_vport *vport, return ret; } -static int hclge_get_vf_tcinfo(struct hclge_vport *vport, - struct hclge_mbx_vf_to_pf_cmd *mbx_req, - bool gen_resp) +static void hclge_get_vf_tcinfo(struct hclge_vport *vport, + struct hclge_respond_to_vf_msg *resp_msg) { struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo; - u8 vf_tc_map = 0; unsigned int i; - int ret; for (i = 0; i < kinfo->num_tc; i++) - vf_tc_map |= BIT(i); - - ret = hclge_gen_resp_to_vf(vport, mbx_req, 0, &vf_tc_map, - sizeof(vf_tc_map)); + resp_msg->data[0] |= BIT(i); - return ret; + resp_msg->len = sizeof(u8); } -static int hclge_get_vf_queue_info(struct hclge_vport *vport, - struct hclge_mbx_vf_to_pf_cmd *mbx_req, - bool gen_resp) +static void hclge_get_vf_queue_info(struct hclge_vport *vport, + struct hclge_respond_to_vf_msg *resp_msg) { #define HCLGE_TQPS_RSS_INFO_LEN 6 - u8 resp_data[HCLGE_TQPS_RSS_INFO_LEN]; +#define HCLGE_TQPS_ALLOC_OFFSET 0 +#define HCLGE_TQPS_RSS_SIZE_OFFSET 2 +#define HCLGE_TQPS_RX_BUFFER_LEN_OFFSET 4 + struct hclge_dev *hdev = vport->back; /* get the queue related info */ - memcpy(&resp_data[0], &vport->alloc_tqps, sizeof(u16)); - memcpy(&resp_data[2], &vport->nic.kinfo.rss_size, sizeof(u16)); - memcpy(&resp_data[4], &hdev->rx_buf_len, sizeof(u16)); - - return hclge_gen_resp_to_vf(vport, mbx_req, 0, resp_data, - HCLGE_TQPS_RSS_INFO_LEN); + memcpy(&resp_msg->data[HCLGE_TQPS_ALLOC_OFFSET], + &vport->alloc_tqps, sizeof(u16)); + memcpy(&resp_msg->data[HCLGE_TQPS_RSS_SIZE_OFFSET], + &vport->nic.kinfo.rss_size, sizeof(u16)); + memcpy(&resp_msg->data[HCLGE_TQPS_RX_BUFFER_LEN_OFFSET], + &hdev->rx_buf_len, sizeof(u16)); + resp_msg->len = HCLGE_TQPS_RSS_INFO_LEN; } -static int hclge_get_vf_mac_addr(struct hclge_vport *vport, - struct hclge_mbx_vf_to_pf_cmd *mbx_req) +static void hclge_get_vf_mac_addr(struct hclge_vport *vport, + struct hclge_respond_to_vf_msg *resp_msg) { - return hclge_gen_resp_to_vf(vport, mbx_req, 0, vport->vf_info.mac, - ETH_ALEN); + ether_addr_copy(resp_msg->data, vport->vf_info.mac); + resp_msg->len = ETH_ALEN; } -static int hclge_get_vf_queue_depth(struct hclge_vport *vport, - struct hclge_mbx_vf_to_pf_cmd *mbx_req, - bool gen_resp) +static void hclge_get_vf_queue_depth(struct hclge_vport *vport, + struct hclge_respond_to_vf_msg *resp_msg) { #define HCLGE_TQPS_DEPTH_INFO_LEN 4 - u8 resp_data[HCLGE_TQPS_DEPTH_INFO_LEN]; +#define HCLGE_TQPS_NUM_TX_DESC_OFFSET 0 +#define HCLGE_TQPS_NUM_RX_DESC_OFFSET 2 + struct hclge_dev *hdev = vport->back; /* get the queue depth info */ - memcpy(&resp_data[0], &hdev->num_tx_desc, sizeof(u16)); - memcpy(&resp_data[2], &hdev->num_rx_desc, sizeof(u16)); - return hclge_gen_resp_to_vf(vport, mbx_req, 0, resp_data, - HCLGE_TQPS_DEPTH_INFO_LEN); + memcpy(&resp_msg->data[HCLGE_TQPS_NUM_TX_DESC_OFFSET], + &hdev->num_tx_desc, sizeof(u16)); + memcpy(&resp_msg->data[HCLGE_TQPS_NUM_RX_DESC_OFFSET], + &hdev->num_rx_desc, sizeof(u16)); + resp_msg->len = HCLGE_TQPS_DEPTH_INFO_LEN; } -static int hclge_get_vf_media_type(struct hclge_vport *vport, - struct hclge_mbx_vf_to_pf_cmd *mbx_req) +static void hclge_get_vf_media_type(struct hclge_vport *vport, + struct hclge_respond_to_vf_msg *resp_msg) { +#define HCLGE_VF_MEDIA_TYPE_OFFSET 0 +#define HCLGE_VF_MODULE_TYPE_OFFSET 1 +#define HCLGE_VF_MEDIA_TYPE_LENGTH 2 + struct hclge_dev *hdev = vport->back; - u8 resp_data[2]; - resp_data[0] = hdev->hw.mac.media_type; - resp_data[1] = hdev->hw.mac.module_type; - return hclge_gen_resp_to_vf(vport, mbx_req, 0, resp_data, - sizeof(resp_data)); + resp_msg->data[HCLGE_VF_MEDIA_TYPE_OFFSET] = + hdev->hw.mac.media_type; + resp_msg->data[HCLGE_VF_MODULE_TYPE_OFFSET] = + hdev->hw.mac.module_type; + resp_msg->len = HCLGE_VF_MEDIA_TYPE_LENGTH; } static int hclge_get_link_info(struct hclge_vport *vport, @@ -529,7 +524,7 @@ static void hclge_get_link_mode(struct hclge_vport *vport, advertising = hdev->hw.mac.advertising[0]; supported = hdev->hw.mac.supported[0]; dest_vfid = mbx_req->mbx_src_vfid; - msg_data[0] = mbx_req->msg[2]; + msg_data[0] = mbx_req->msg.data[0]; send_data = msg_data[0] == HCLGE_SUPPORTED ? supported : advertising; @@ -543,29 +538,22 @@ static void hclge_mbx_reset_vf_queue(struct hclge_vport *vport, { u16 queue_id; - memcpy(&queue_id, &mbx_req->msg[2], sizeof(queue_id)); + memcpy(&queue_id, mbx_req->msg.data, sizeof(queue_id)); hclge_reset_vf_queue(vport, queue_id); - - /* send response msg to VF after queue reset complete */ - hclge_gen_resp_to_vf(vport, mbx_req, 0, NULL, 0); } -static void hclge_reset_vf(struct hclge_vport *vport, - struct hclge_mbx_vf_to_pf_cmd *mbx_req) +static int hclge_reset_vf(struct hclge_vport *vport) { struct hclge_dev *hdev = vport->back; - int ret; dev_warn(&hdev->pdev->dev, "PF received VF reset request from VF %u!", vport->vport_id); - ret = hclge_func_reset_cmd(hdev, vport->vport_id); - hclge_gen_resp_to_vf(vport, mbx_req, ret, NULL, 0); + return hclge_func_reset_cmd(hdev, vport->vport_id); } -static void hclge_vf_keep_alive(struct hclge_vport *vport, - struct hclge_mbx_vf_to_pf_cmd *mbx_req) +static void hclge_vf_keep_alive(struct hclge_vport *vport) { vport->last_active_jiffies = jiffies; } @@ -573,45 +561,39 @@ static void hclge_vf_keep_alive(struct hclge_vport *vport, static int hclge_set_vf_mtu(struct hclge_vport *vport, struct hclge_mbx_vf_to_pf_cmd *mbx_req) { - int ret; u32 mtu; - memcpy(&mtu, &mbx_req->msg[2], sizeof(mtu)); - ret = hclge_set_vport_mtu(vport, mtu); + memcpy(&mtu, mbx_req->msg.data, sizeof(mtu)); - return hclge_gen_resp_to_vf(vport, mbx_req, ret, NULL, 0); + return hclge_set_vport_mtu(vport, mtu); } -static int hclge_get_queue_id_in_pf(struct hclge_vport *vport, - struct hclge_mbx_vf_to_pf_cmd *mbx_req) +static void hclge_get_queue_id_in_pf(struct hclge_vport *vport, + struct hclge_mbx_vf_to_pf_cmd *mbx_req, + struct hclge_respond_to_vf_msg *resp_msg) { u16 queue_id, qid_in_pf; - u8 resp_data[2]; - memcpy(&queue_id, &mbx_req->msg[2], sizeof(queue_id)); + memcpy(&queue_id, mbx_req->msg.data, sizeof(queue_id)); qid_in_pf = hclge_covert_handle_qid_global(&vport->nic, queue_id); - memcpy(resp_data, &qid_in_pf, sizeof(qid_in_pf)); - - return hclge_gen_resp_to_vf(vport, mbx_req, 0, resp_data, - sizeof(resp_data)); + memcpy(resp_msg->data, &qid_in_pf, sizeof(qid_in_pf)); + resp_msg->len = sizeof(qid_in_pf); } -static int hclge_get_rss_key(struct hclge_vport *vport, - struct hclge_mbx_vf_to_pf_cmd *mbx_req) +static void hclge_get_rss_key(struct hclge_vport *vport, + struct hclge_mbx_vf_to_pf_cmd *mbx_req, + struct hclge_respond_to_vf_msg *resp_msg) { #define HCLGE_RSS_MBX_RESP_LEN 8 - u8 resp_data[HCLGE_RSS_MBX_RESP_LEN]; struct hclge_dev *hdev = vport->back; u8 index; - index = mbx_req->msg[2]; + index = mbx_req->msg.data[0]; - memcpy(&resp_data[0], + memcpy(resp_msg->data, &hdev->vport[0].rss_hash_key[index * HCLGE_RSS_MBX_RESP_LEN], HCLGE_RSS_MBX_RESP_LEN); - - return hclge_gen_resp_to_vf(vport, mbx_req, 0, resp_data, - HCLGE_RSS_MBX_RESP_LEN); + resp_msg->len = HCLGE_RSS_MBX_RESP_LEN; } static void hclge_link_fail_parse(struct hclge_dev *hdev, u8 link_fail_code) @@ -634,13 +616,10 @@ static void hclge_link_fail_parse(struct hclge_dev *hdev, u8 link_fail_code) static void hclge_handle_link_change_event(struct hclge_dev *hdev, struct hclge_mbx_vf_to_pf_cmd *req) { -#define LINK_STATUS_OFFSET 1 -#define LINK_FAIL_CODE_OFFSET 2 - hclge_task_schedule(hdev, 0); - if (!req->msg[LINK_STATUS_OFFSET]) - hclge_link_fail_parse(hdev, req->msg[LINK_FAIL_CODE_OFFSET]); + if (!req->msg.subcode) + hclge_link_fail_parse(hdev, req->msg.data[0]); } static bool hclge_cmd_crq_empty(struct hclge_hw *hw) @@ -662,12 +641,14 @@ static void hclge_handle_ncsi_error(struct hclge_dev *hdev) void hclge_mbx_handler(struct hclge_dev *hdev) { struct hclge_cmq_ring *crq = &hdev->hw.cmq.crq; + struct hclge_respond_to_vf_msg resp_msg; struct hclge_mbx_vf_to_pf_cmd *req; struct hclge_vport *vport; struct hclge_desc *desc; unsigned int flag; - int ret; + int ret = 0; + memset(&resp_msg, 0, sizeof(resp_msg)); /* handle all the mailbox requests in the queue */ while (!hclge_cmd_crq_empty(&hdev->hw)) { if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state)) { @@ -683,7 +664,7 @@ void hclge_mbx_handler(struct hclge_dev *hdev) if (unlikely(!hnae3_get_bit(flag, HCLGE_CMDQ_RX_OUTVLD_B))) { dev_warn(&hdev->pdev->dev, "dropped invalid mailbox message, code = %u\n", - req->msg[0]); + req->msg.code); /* dropping/not processing this invalid message */ crq->desc[crq->next_to_use].flag = 0; @@ -693,7 +674,7 @@ void hclge_mbx_handler(struct hclge_dev *hdev) vport = &hdev->vport[req->mbx_src_vfid]; - switch (req->msg[0]) { + switch (req->msg.code) { case HCLGE_MBX_MAP_RING_TO_VECTOR: ret = hclge_map_unmap_ring_to_vf_vector(vport, true, req); @@ -717,47 +698,34 @@ void hclge_mbx_handler(struct hclge_dev *hdev) ret); break; case HCLGE_MBX_SET_MULTICAST: - ret = hclge_set_vf_mc_mac_addr(vport, req, false); + ret = hclge_set_vf_mc_mac_addr(vport, req); if (ret) dev_err(&hdev->pdev->dev, "PF fail(%d) to set VF MC MAC Addr\n", ret); break; case HCLGE_MBX_SET_VLAN: - ret = hclge_set_vf_vlan_cfg(vport, req); + ret = hclge_set_vf_vlan_cfg(vport, req, &resp_msg); if (ret) dev_err(&hdev->pdev->dev, "PF failed(%d) to config VF's VLAN\n", ret); break; case HCLGE_MBX_SET_ALIVE: - ret = hclge_set_vf_alive(vport, req, false); + ret = hclge_set_vf_alive(vport, req); if (ret) dev_err(&hdev->pdev->dev, "PF failed(%d) to set VF's ALIVE\n", ret); break; case HCLGE_MBX_GET_QINFO: - ret = hclge_get_vf_queue_info(vport, req, true); - if (ret) - dev_err(&hdev->pdev->dev, - "PF failed(%d) to get Q info for VF\n", - ret); + hclge_get_vf_queue_info(vport, &resp_msg); break; case HCLGE_MBX_GET_QDEPTH: - ret = hclge_get_vf_queue_depth(vport, req, true); - if (ret) - dev_err(&hdev->pdev->dev, - "PF failed(%d) to get Q depth for VF\n", - ret); + hclge_get_vf_queue_depth(vport, &resp_msg); break; - case HCLGE_MBX_GET_TCINFO: - ret = hclge_get_vf_tcinfo(vport, req, true); - if (ret) - dev_err(&hdev->pdev->dev, - "PF failed(%d) to get TC info for VF\n", - ret); + hclge_get_vf_tcinfo(vport, &resp_msg); break; case HCLGE_MBX_GET_LINK_STATUS: ret = hclge_get_link_info(vport, req); @@ -770,10 +738,10 @@ void hclge_mbx_handler(struct hclge_dev *hdev) hclge_mbx_reset_vf_queue(vport, req); break; case HCLGE_MBX_RESET: - hclge_reset_vf(vport, req); + ret = hclge_reset_vf(vport); break; case HCLGE_MBX_KEEP_ALIVE: - hclge_vf_keep_alive(vport, req); + hclge_vf_keep_alive(vport); break; case HCLGE_MBX_SET_MTU: ret = hclge_set_vf_mtu(vport, req); @@ -782,23 +750,16 @@ void hclge_mbx_handler(struct hclge_dev *hdev) "VF fail(%d) to set mtu\n", ret); break; case HCLGE_MBX_GET_QID_IN_PF: - ret = hclge_get_queue_id_in_pf(vport, req); - if (ret) - dev_err(&hdev->pdev->dev, - "PF failed(%d) to get qid for VF\n", - ret); + hclge_get_queue_id_in_pf(vport, req, &resp_msg); break; case HCLGE_MBX_GET_RSS_KEY: - ret = hclge_get_rss_key(vport, req); - if (ret) - dev_err(&hdev->pdev->dev, - "PF fail(%d) to get rss key for VF\n", - ret); + hclge_get_rss_key(vport, req, &resp_msg); break; case HCLGE_MBX_GET_LINK_MODE: hclge_get_link_mode(vport, req); break; case HCLGE_MBX_GET_VF_FLR_STATUS: + case HCLGE_MBX_VF_UNINIT: hclge_rm_vport_all_mac_table(vport, true, HCLGE_MAC_ADDR_UC); hclge_rm_vport_all_mac_table(vport, true, @@ -806,21 +767,13 @@ void hclge_mbx_handler(struct hclge_dev *hdev) hclge_rm_vport_all_vlan_table(vport, true); break; case HCLGE_MBX_GET_MEDIA_TYPE: - ret = hclge_get_vf_media_type(vport, req); - if (ret) - dev_err(&hdev->pdev->dev, - "PF fail(%d) to media type for VF\n", - ret); + hclge_get_vf_media_type(vport, &resp_msg); break; case HCLGE_MBX_PUSH_LINK_STATUS: hclge_handle_link_change_event(hdev, req); break; case HCLGE_MBX_GET_MAC_ADDR: - ret = hclge_get_vf_mac_addr(vport, req); - if (ret) - dev_err(&hdev->pdev->dev, - "PF failed(%d) to get MAC for VF\n", - ret); + hclge_get_vf_mac_addr(vport, &resp_msg); break; case HCLGE_MBX_NCSI_ERROR: hclge_handle_ncsi_error(hdev); @@ -828,11 +781,22 @@ void hclge_mbx_handler(struct hclge_dev *hdev) default: dev_err(&hdev->pdev->dev, "un-supported mailbox message, code = %u\n", - req->msg[0]); + req->msg.code); break; } + + /* PF driver should not reply IMP */ + if (hnae3_get_bit(req->mbx_need_resp, HCLGE_MBX_NEED_RESP_B) && + req->msg.code < HCLGE_MBX_GET_VF_FLR_STATUS) { + resp_msg.status = ret; + hclge_gen_resp_to_vf(vport, req, &resp_msg); + } + crq->desc[crq->next_to_use].flag = 0; hclge_mbx_ring_ptr_move_crq(crq); + + /* reinitialize ret after complete the mbx message processing */ + ret = 0; } /* Write back CMDQ_RQ header pointer, M7 need this pointer */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index f199a2383a75..2c1d01fe05a4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -229,13 +229,25 @@ static void hclgevf_get_stats(struct hnae3_handle *handle, u64 *data) hclgevf_tqps_get_stats(handle, data); } +static void hclgevf_build_send_msg(struct hclge_vf_to_pf_msg *msg, u8 code, + u8 subcode) +{ + if (msg) { + memset(msg, 0, sizeof(struct hclge_vf_to_pf_msg)); + msg->code = code; + msg->subcode = subcode; + } +} + static int hclgevf_get_tc_info(struct hclgevf_dev *hdev) { + struct hclge_vf_to_pf_msg send_msg; u8 resp_msg; int status; - status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_TCINFO, 0, NULL, 0, - true, &resp_msg, sizeof(resp_msg)); + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_GET_TCINFO, 0); + status = hclgevf_send_mbx_msg(hdev, &send_msg, true, &resp_msg, + sizeof(resp_msg)); if (status) { dev_err(&hdev->pdev->dev, "VF request to get TC info from PF failed %d", @@ -251,12 +263,14 @@ static int hclgevf_get_tc_info(struct hclgevf_dev *hdev) static int hclgevf_get_port_base_vlan_filter_state(struct hclgevf_dev *hdev) { struct hnae3_handle *nic = &hdev->nic; + struct hclge_vf_to_pf_msg send_msg; u8 resp_msg; int ret; - ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_VLAN, - HCLGE_MBX_GET_PORT_BASE_VLAN_STATE, - NULL, 0, true, &resp_msg, sizeof(u8)); + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_VLAN, + HCLGE_MBX_GET_PORT_BASE_VLAN_STATE); + ret = hclgevf_send_mbx_msg(hdev, &send_msg, true, &resp_msg, + sizeof(u8)); if (ret) { dev_err(&hdev->pdev->dev, "VF request to get port based vlan state failed %d", @@ -272,11 +286,16 @@ static int hclgevf_get_port_base_vlan_filter_state(struct hclgevf_dev *hdev) static int hclgevf_get_queue_info(struct hclgevf_dev *hdev) { #define HCLGEVF_TQPS_RSS_INFO_LEN 6 +#define HCLGEVF_TQPS_ALLOC_OFFSET 0 +#define HCLGEVF_TQPS_RSS_SIZE_OFFSET 2 +#define HCLGEVF_TQPS_RX_BUFFER_LEN_OFFSET 4 + u8 resp_msg[HCLGEVF_TQPS_RSS_INFO_LEN]; + struct hclge_vf_to_pf_msg send_msg; int status; - status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_QINFO, 0, NULL, 0, - true, resp_msg, + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_GET_QINFO, 0); + status = hclgevf_send_mbx_msg(hdev, &send_msg, true, resp_msg, HCLGEVF_TQPS_RSS_INFO_LEN); if (status) { dev_err(&hdev->pdev->dev, @@ -285,9 +304,12 @@ static int hclgevf_get_queue_info(struct hclgevf_dev *hdev) return status; } - memcpy(&hdev->num_tqps, &resp_msg[0], sizeof(u16)); - memcpy(&hdev->rss_size_max, &resp_msg[2], sizeof(u16)); - memcpy(&hdev->rx_buf_len, &resp_msg[4], sizeof(u16)); + memcpy(&hdev->num_tqps, &resp_msg[HCLGEVF_TQPS_ALLOC_OFFSET], + sizeof(u16)); + memcpy(&hdev->rss_size_max, &resp_msg[HCLGEVF_TQPS_RSS_SIZE_OFFSET], + sizeof(u16)); + memcpy(&hdev->rx_buf_len, &resp_msg[HCLGEVF_TQPS_RX_BUFFER_LEN_OFFSET], + sizeof(u16)); return 0; } @@ -295,11 +317,15 @@ static int hclgevf_get_queue_info(struct hclgevf_dev *hdev) static int hclgevf_get_queue_depth(struct hclgevf_dev *hdev) { #define HCLGEVF_TQPS_DEPTH_INFO_LEN 4 +#define HCLGEVF_TQPS_NUM_TX_DESC_OFFSET 0 +#define HCLGEVF_TQPS_NUM_RX_DESC_OFFSET 2 + u8 resp_msg[HCLGEVF_TQPS_DEPTH_INFO_LEN]; + struct hclge_vf_to_pf_msg send_msg; int ret; - ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_QDEPTH, 0, NULL, 0, - true, resp_msg, + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_GET_QDEPTH, 0); + ret = hclgevf_send_mbx_msg(hdev, &send_msg, true, resp_msg, HCLGEVF_TQPS_DEPTH_INFO_LEN); if (ret) { dev_err(&hdev->pdev->dev, @@ -308,8 +334,10 @@ static int hclgevf_get_queue_depth(struct hclgevf_dev *hdev) return ret; } - memcpy(&hdev->num_tx_desc, &resp_msg[0], sizeof(u16)); - memcpy(&hdev->num_rx_desc, &resp_msg[2], sizeof(u16)); + memcpy(&hdev->num_tx_desc, &resp_msg[HCLGEVF_TQPS_NUM_TX_DESC_OFFSET], + sizeof(u16)); + memcpy(&hdev->num_rx_desc, &resp_msg[HCLGEVF_TQPS_NUM_RX_DESC_OFFSET], + sizeof(u16)); return 0; } @@ -317,14 +345,14 @@ static int hclgevf_get_queue_depth(struct hclgevf_dev *hdev) static u16 hclgevf_get_qid_global(struct hnae3_handle *handle, u16 queue_id) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); - u8 msg_data[2], resp_data[2]; + struct hclge_vf_to_pf_msg send_msg; u16 qid_in_pf = 0; + u8 resp_data[2]; int ret; - memcpy(&msg_data[0], &queue_id, sizeof(queue_id)); - - ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_QID_IN_PF, 0, msg_data, - sizeof(msg_data), true, resp_data, + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_GET_QID_IN_PF, 0); + memcpy(send_msg.data, &queue_id, sizeof(queue_id)); + ret = hclgevf_send_mbx_msg(hdev, &send_msg, true, resp_data, sizeof(resp_data)); if (!ret) qid_in_pf = *(u16 *)resp_data; @@ -334,11 +362,13 @@ static u16 hclgevf_get_qid_global(struct hnae3_handle *handle, u16 queue_id) static int hclgevf_get_pf_media_type(struct hclgevf_dev *hdev) { + struct hclge_vf_to_pf_msg send_msg; u8 resp_msg[2]; int ret; - ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_MEDIA_TYPE, 0, NULL, 0, - true, resp_msg, sizeof(resp_msg)); + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_GET_MEDIA_TYPE, 0); + ret = hclgevf_send_mbx_msg(hdev, &send_msg, true, resp_msg, + sizeof(resp_msg)); if (ret) { dev_err(&hdev->pdev->dev, "VF request to get the pf port media type failed %d", @@ -425,11 +455,11 @@ static int hclgevf_knic_setup(struct hclgevf_dev *hdev) static void hclgevf_request_link_info(struct hclgevf_dev *hdev) { + struct hclge_vf_to_pf_msg send_msg; int status; - u8 resp_msg; - status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_LINK_STATUS, 0, NULL, - 0, false, &resp_msg, sizeof(resp_msg)); + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_GET_LINK_STATUS, 0); + status = hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); if (status) dev_err(&hdev->pdev->dev, "VF failed to fetch link status(%d) from PF", status); @@ -463,19 +493,16 @@ void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state) static void hclgevf_update_link_mode(struct hclgevf_dev *hdev) { -#define HCLGEVF_ADVERTISING 0 -#define HCLGEVF_SUPPORTED 1 - u8 send_msg; - u8 resp_msg; +#define HCLGEVF_ADVERTISING 0 +#define HCLGEVF_SUPPORTED 1 - send_msg = HCLGEVF_ADVERTISING; - hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_LINK_MODE, 0, - &send_msg, sizeof(send_msg), false, - &resp_msg, sizeof(resp_msg)); - send_msg = HCLGEVF_SUPPORTED; - hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_LINK_MODE, 0, - &send_msg, sizeof(send_msg), false, - &resp_msg, sizeof(resp_msg)); + struct hclge_vf_to_pf_msg send_msg; + + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_GET_LINK_MODE, 0); + send_msg.data[0] = HCLGEVF_ADVERTISING; + hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); + send_msg.data[0] = HCLGEVF_SUPPORTED; + hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); } static int hclgevf_set_handle_info(struct hclgevf_dev *hdev) @@ -677,19 +704,19 @@ static int hclgevf_set_rss_tc_mode(struct hclgevf_dev *hdev, u16 rss_size) static int hclgevf_get_rss_hash_key(struct hclgevf_dev *hdev) { #define HCLGEVF_RSS_MBX_RESP_LEN 8 - struct hclgevf_rss_cfg *rss_cfg = &hdev->rss_cfg; u8 resp_msg[HCLGEVF_RSS_MBX_RESP_LEN]; + struct hclge_vf_to_pf_msg send_msg; u16 msg_num, hash_key_index; u8 index; int ret; + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_GET_RSS_KEY, 0); msg_num = (HCLGEVF_RSS_KEY_SIZE + HCLGEVF_RSS_MBX_RESP_LEN - 1) / HCLGEVF_RSS_MBX_RESP_LEN; for (index = 0; index < msg_num; index++) { - ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_RSS_KEY, 0, - &index, sizeof(index), - true, resp_msg, + send_msg.data[0] = index; + ret = hclgevf_send_mbx_msg(hdev, &send_msg, true, resp_msg, HCLGEVF_RSS_MBX_RESP_LEN); if (ret) { dev_err(&hdev->pdev->dev, @@ -1001,44 +1028,32 @@ static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en, struct hnae3_ring_chain_node *ring_chain) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + struct hclge_vf_to_pf_msg send_msg; struct hnae3_ring_chain_node *node; - struct hclge_mbx_vf_to_pf_cmd *req; - struct hclgevf_desc desc; - int i = 0; int status; - u8 type; + int i = 0; - req = (struct hclge_mbx_vf_to_pf_cmd *)desc.data; - type = en ? HCLGE_MBX_MAP_RING_TO_VECTOR : + memset(&send_msg, 0, sizeof(send_msg)); + send_msg.code = en ? HCLGE_MBX_MAP_RING_TO_VECTOR : HCLGE_MBX_UNMAP_RING_TO_VECTOR; + send_msg.vector_id = vector_id; for (node = ring_chain; node; node = node->next) { - int idx_offset = HCLGE_MBX_RING_MAP_BASIC_MSG_NUM + - HCLGE_MBX_RING_NODE_VARIABLE_NUM * i; - - if (i == 0) { - hclgevf_cmd_setup_basic_desc(&desc, - HCLGEVF_OPC_MBX_VF_TO_PF, - false); - req->msg[0] = type; - req->msg[1] = vector_id; - } - - req->msg[idx_offset] = + send_msg.param[i].ring_type = hnae3_get_bit(node->flag, HNAE3_RING_TYPE_B); - req->msg[idx_offset + 1] = node->tqp_index; - req->msg[idx_offset + 2] = hnae3_get_field(node->int_gl_idx, - HNAE3_RING_GL_IDX_M, - HNAE3_RING_GL_IDX_S); + + send_msg.param[i].tqp_index = node->tqp_index; + send_msg.param[i].int_gl_index = + hnae3_get_field(node->int_gl_idx, + HNAE3_RING_GL_IDX_M, + HNAE3_RING_GL_IDX_S); i++; - if ((i == (HCLGE_MBX_VF_MSG_DATA_NUM - - HCLGE_MBX_RING_MAP_BASIC_MSG_NUM) / - HCLGE_MBX_RING_NODE_VARIABLE_NUM) || - !node->next) { - req->msg[2] = i; + if (i == HCLGE_MBX_MAX_RING_CHAIN_PARAM_NUM || !node->next) { + send_msg.ring_num = i; - status = hclgevf_cmd_send(&hdev->hw, &desc, 1); + status = hclgevf_send_mbx_msg(hdev, &send_msg, false, + NULL, 0); if (status) { dev_err(&hdev->pdev->dev, "Map TQP fail, status is %d.\n", @@ -1046,11 +1061,6 @@ static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en, return status; } i = 0; - hclgevf_cmd_setup_basic_desc(&desc, - HCLGEVF_OPC_MBX_VF_TO_PF, - false); - req->msg[0] = type; - req->msg[1] = vector_id; } } @@ -1123,18 +1133,17 @@ static int hclgevf_cmd_set_promisc_mode(struct hclgevf_dev *hdev, bool en_uc_pmc, bool en_mc_pmc, bool en_bc_pmc) { - struct hclge_mbx_vf_to_pf_cmd *req; - struct hclgevf_desc desc; + struct hclge_vf_to_pf_msg send_msg; int ret; - req = (struct hclge_mbx_vf_to_pf_cmd *)desc.data; - hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_VF_TO_PF, false); - req->msg[0] = HCLGE_MBX_SET_PROMISC_MODE; - req->msg[1] = en_bc_pmc ? 1 : 0; - req->msg[2] = en_uc_pmc ? 1 : 0; - req->msg[3] = en_mc_pmc ? 1 : 0; + memset(&send_msg, 0, sizeof(send_msg)); + send_msg.code = HCLGE_MBX_SET_PROMISC_MODE; + send_msg.en_bc = en_bc_pmc ? 1 : 0; + send_msg.en_uc = en_uc_pmc ? 1 : 0; + send_msg.en_mc = en_mc_pmc ? 1 : 0; + + ret = hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); - ret = hclgevf_cmd_send(&hdev->hw, &desc, 1); if (ret) dev_err(&hdev->pdev->dev, "Set promisc mode fail, status is %d.\n", ret); @@ -1193,11 +1202,13 @@ static void hclgevf_reset_tqp_stats(struct hnae3_handle *handle) static int hclgevf_get_host_mac_addr(struct hclgevf_dev *hdev, u8 *p) { + struct hclge_vf_to_pf_msg send_msg; u8 host_mac[ETH_ALEN]; int status; - status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_MAC_ADDR, 0, NULL, 0, - true, host_mac, ETH_ALEN); + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_GET_MAC_ADDR, 0); + status = hclgevf_send_mbx_msg(hdev, &send_msg, true, host_mac, + ETH_ALEN); if (status) { dev_err(&hdev->pdev->dev, "fail to get VF MAC from host %d", status); @@ -1229,20 +1240,16 @@ static int hclgevf_set_mac_addr(struct hnae3_handle *handle, void *p, { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); u8 *old_mac_addr = (u8 *)hdev->hw.mac.mac_addr; + struct hclge_vf_to_pf_msg send_msg; u8 *new_mac_addr = (u8 *)p; - u8 msg_data[ETH_ALEN * 2]; - u16 subcode; int status; - ether_addr_copy(msg_data, new_mac_addr); - ether_addr_copy(&msg_data[ETH_ALEN], old_mac_addr); - - subcode = is_first ? HCLGE_MBX_MAC_VLAN_UC_ADD : + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_UNICAST, 0); + send_msg.subcode = is_first ? HCLGE_MBX_MAC_VLAN_UC_ADD : HCLGE_MBX_MAC_VLAN_UC_MODIFY; - - status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_UNICAST, - subcode, msg_data, sizeof(msg_data), - true, NULL, 0); + ether_addr_copy(send_msg.data, new_mac_addr); + ether_addr_copy(&send_msg.data[ETH_ALEN], old_mac_addr); + status = hclgevf_send_mbx_msg(hdev, &send_msg, true, NULL, 0); if (!status) ether_addr_copy(hdev->hw.mac.mac_addr, new_mac_addr); @@ -1253,49 +1260,60 @@ static int hclgevf_add_uc_addr(struct hnae3_handle *handle, const unsigned char *addr) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + struct hclge_vf_to_pf_msg send_msg; - return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_UNICAST, - HCLGE_MBX_MAC_VLAN_UC_ADD, - addr, ETH_ALEN, false, NULL, 0); + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_UNICAST, + HCLGE_MBX_MAC_VLAN_UC_ADD); + ether_addr_copy(send_msg.data, addr); + return hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); } static int hclgevf_rm_uc_addr(struct hnae3_handle *handle, const unsigned char *addr) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + struct hclge_vf_to_pf_msg send_msg; - return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_UNICAST, - HCLGE_MBX_MAC_VLAN_UC_REMOVE, - addr, ETH_ALEN, false, NULL, 0); + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_UNICAST, + HCLGE_MBX_MAC_VLAN_UC_REMOVE); + ether_addr_copy(send_msg.data, addr); + return hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); } static int hclgevf_add_mc_addr(struct hnae3_handle *handle, const unsigned char *addr) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + struct hclge_vf_to_pf_msg send_msg; - return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_MULTICAST, - HCLGE_MBX_MAC_VLAN_MC_ADD, - addr, ETH_ALEN, false, NULL, 0); + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_MULTICAST, + HCLGE_MBX_MAC_VLAN_MC_ADD); + ether_addr_copy(send_msg.data, addr); + return hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); } static int hclgevf_rm_mc_addr(struct hnae3_handle *handle, const unsigned char *addr) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + struct hclge_vf_to_pf_msg send_msg; - return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_MULTICAST, - HCLGE_MBX_MAC_VLAN_MC_REMOVE, - addr, ETH_ALEN, false, NULL, 0); + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_MULTICAST, + HCLGE_MBX_MAC_VLAN_MC_REMOVE); + ether_addr_copy(send_msg.data, addr); + return hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); } static int hclgevf_set_vlan_filter(struct hnae3_handle *handle, __be16 proto, u16 vlan_id, bool is_kill) { -#define HCLGEVF_VLAN_MBX_MSG_LEN 5 +#define HCLGEVF_VLAN_MBX_IS_KILL_OFFSET 0 +#define HCLGEVF_VLAN_MBX_VLAN_ID_OFFSET 1 +#define HCLGEVF_VLAN_MBX_PROTO_OFFSET 3 + struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); - u8 msg_data[HCLGEVF_VLAN_MBX_MSG_LEN]; + struct hclge_vf_to_pf_msg send_msg; int ret; if (vlan_id > HCLGEVF_MAX_VLAN_ID) @@ -1313,16 +1331,18 @@ static int hclgevf_set_vlan_filter(struct hnae3_handle *handle, return -EBUSY; } - msg_data[0] = is_kill; - memcpy(&msg_data[1], &vlan_id, sizeof(vlan_id)); - memcpy(&msg_data[3], &proto, sizeof(proto)); + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_VLAN, + HCLGE_MBX_VLAN_FILTER); + send_msg.data[HCLGEVF_VLAN_MBX_IS_KILL_OFFSET] = is_kill; + memcpy(&send_msg.data[HCLGEVF_VLAN_MBX_VLAN_ID_OFFSET], &vlan_id, + sizeof(vlan_id)); + memcpy(&send_msg.data[HCLGEVF_VLAN_MBX_PROTO_OFFSET], &proto, + sizeof(proto)); /* when remove hw vlan filter failed, record the vlan id, * and try to remove it from hw later, to be consistence * with stack. */ - ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_VLAN, - HCLGE_MBX_VLAN_FILTER, msg_data, - HCLGEVF_VLAN_MBX_MSG_LEN, true, NULL, 0); + ret = hclgevf_send_mbx_msg(hdev, &send_msg, true, NULL, 0); if (is_kill && ret) set_bit(vlan_id, hdev->vlan_del_fail_bmap); @@ -1355,37 +1375,38 @@ static void hclgevf_sync_vlan_filter(struct hclgevf_dev *hdev) static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); - u8 msg_data; + struct hclge_vf_to_pf_msg send_msg; - msg_data = enable ? 1 : 0; - return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_VLAN, - HCLGE_MBX_VLAN_RX_OFF_CFG, &msg_data, - 1, false, NULL, 0); + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_VLAN, + HCLGE_MBX_VLAN_RX_OFF_CFG); + send_msg.data[0] = enable ? 1 : 0; + return hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); } static int hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); - u8 msg_data[2]; + struct hclge_vf_to_pf_msg send_msg; int ret; - memcpy(msg_data, &queue_id, sizeof(queue_id)); - /* disable vf queue before send queue reset msg to PF */ ret = hclgevf_tqp_enable(hdev, queue_id, 0, false); if (ret) return ret; - return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_QUEUE_RESET, 0, msg_data, - sizeof(msg_data), true, NULL, 0); + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_QUEUE_RESET, 0); + memcpy(send_msg.data, &queue_id, sizeof(queue_id)); + return hclgevf_send_mbx_msg(hdev, &send_msg, true, NULL, 0); } static int hclgevf_set_mtu(struct hnae3_handle *handle, int new_mtu) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + struct hclge_vf_to_pf_msg send_msg; - return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_MTU, 0, (u8 *)&new_mtu, - sizeof(new_mtu), true, NULL, 0); + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_MTU, 0); + memcpy(send_msg.data, &new_mtu, sizeof(new_mtu)); + return hclgevf_send_mbx_msg(hdev, &send_msg, true, NULL, 0); } static int hclgevf_notify_client(struct hclgevf_dev *hdev, @@ -1500,11 +1521,12 @@ static int hclgevf_reset_prepare_wait(struct hclgevf_dev *hdev) { #define HCLGEVF_RESET_SYNC_TIME 100 + struct hclge_vf_to_pf_msg send_msg; int ret = 0; if (hdev->reset_type == HNAE3_VF_FUNC_RESET) { - ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_RESET, 0, NULL, - 0, true, NULL, sizeof(u8)); + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_RESET, 0); + ret = hclgevf_send_mbx_msg(hdev, &send_msg, true, NULL, 0); hdev->rst_stats.vf_func_rst_cnt++; } @@ -1881,14 +1903,14 @@ static void hclgevf_mailbox_service_task(struct hclgevf_dev *hdev) static void hclgevf_keep_alive(struct hclgevf_dev *hdev) { - u8 respmsg; + struct hclge_vf_to_pf_msg send_msg; int ret; if (test_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state)) return; - ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_KEEP_ALIVE, 0, NULL, - 0, false, &respmsg, sizeof(respmsg)); + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_KEEP_ALIVE, 0); + ret = hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); if (ret) dev_err(&hdev->pdev->dev, "VF sends keep alive cmd failed(=%d)\n", ret); @@ -2245,12 +2267,16 @@ static void hclgevf_ae_stop(struct hnae3_handle *handle) static int hclgevf_set_alive(struct hnae3_handle *handle, bool alive) { +#define HCLGEVF_STATE_ALIVE 1 +#define HCLGEVF_STATE_NOT_ALIVE 0 + struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); - u8 msg_data; + struct hclge_vf_to_pf_msg send_msg; - msg_data = alive ? 1 : 0; - return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_ALIVE, - 0, &msg_data, 1, false, NULL, 0); + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_ALIVE, 0); + send_msg.data[0] = alive ? HCLGEVF_STATE_ALIVE : + HCLGEVF_STATE_NOT_ALIVE; + return hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); } static int hclgevf_client_start(struct hnae3_handle *handle) @@ -2804,8 +2830,13 @@ err_cmd_queue_init: static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev) { + struct hclge_vf_to_pf_msg send_msg; + hclgevf_state_uninit(hdev); + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_VF_UNINIT, 0); + hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); + if (test_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state)) { hclgevf_misc_irq_uninit(hdev); hclgevf_uninit_msi(hdev); @@ -3101,16 +3132,17 @@ void hclgevf_update_port_base_vlan_info(struct hclgevf_dev *hdev, u16 state, u8 *port_base_vlan_info, u8 data_size) { struct hnae3_handle *nic = &hdev->nic; + struct hclge_vf_to_pf_msg send_msg; rtnl_lock(); hclgevf_notify_client(hdev, HNAE3_DOWN_CLIENT); rtnl_unlock(); /* send msg to PF and wait update port based vlan info */ - hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_VLAN, - HCLGE_MBX_PORT_BASE_VLAN_CFG, - port_base_vlan_info, data_size, - false, NULL, 0); + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_VLAN, + HCLGE_MBX_PORT_BASE_VLAN_CFG); + memcpy(send_msg.data, port_base_vlan_info, data_size); + hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); if (state == HNAE3_PORT_BASE_VLAN_DISABLE) nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_DISABLE; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index fee8d97f323c..3b88d866facc 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -305,8 +305,8 @@ static inline bool hclgevf_is_reset_pending(struct hclgevf_dev *hdev) return !!hdev->reset_pending; } -int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, u16 code, u16 subcode, - const u8 *msg_data, u8 msg_len, bool need_resp, +int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, + struct hclge_vf_to_pf_msg *send_msg, bool need_resp, u8 *resp_data, u16 resp_len); void hclgevf_mbx_handler(struct hclgevf_dev *hdev); void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c index 7cbd715d5e7a..9b8154955f91 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c @@ -5,6 +5,11 @@ #include "hclgevf_main.h" #include "hnae3.h" +static int hclgevf_resp_to_errno(u16 resp_code) +{ + return resp_code ? -resp_code : 0; +} + static void hclgevf_reset_mbx_resp_status(struct hclgevf_dev *hdev) { /* this function should be called with mbx_resp.mbx_mutex held @@ -79,8 +84,8 @@ static int hclgevf_get_mbx_resp(struct hclgevf_dev *hdev, u16 code0, u16 code1, return 0; } -int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, u16 code, u16 subcode, - const u8 *msg_data, u8 msg_len, bool need_resp, +int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, + struct hclge_vf_to_pf_msg *send_msg, bool need_resp, u8 *resp_data, u16 resp_len) { struct hclge_mbx_vf_to_pf_cmd *req; @@ -89,21 +94,17 @@ int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, u16 code, u16 subcode, req = (struct hclge_mbx_vf_to_pf_cmd *)desc.data; - /* first two bytes are reserved for code & subcode */ - if (msg_len > (HCLGE_MBX_MAX_MSG_SIZE - 2)) { + if (!send_msg) { dev_err(&hdev->pdev->dev, - "VF send mbx msg fail, msg len %d exceeds max len %d\n", - msg_len, HCLGE_MBX_MAX_MSG_SIZE); + "failed to send mbx, msg is NULL\n"); return -EINVAL; } hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_VF_TO_PF, false); - req->mbx_need_resp |= need_resp ? HCLGE_MBX_NEED_RESP_BIT : - ~HCLGE_MBX_NEED_RESP_BIT; - req->msg[0] = code; - req->msg[1] = subcode; - if (msg_data) - memcpy(&req->msg[2], msg_data, msg_len); + if (need_resp) + hnae3_set_bit(req->mbx_need_resp, HCLGE_MBX_NEED_RESP_B, 1); + + memcpy(&req->msg, send_msg, sizeof(struct hclge_vf_to_pf_msg)); /* synchronous send */ if (need_resp) { @@ -118,7 +119,8 @@ int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, u16 code, u16 subcode, return status; } - status = hclgevf_get_mbx_resp(hdev, code, subcode, resp_data, + status = hclgevf_get_mbx_resp(hdev, send_msg->code, + send_msg->subcode, resp_data, resp_len); mutex_unlock(&hdev->mbx_resp.mbx_mutex); } else { @@ -169,7 +171,7 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev) if (unlikely(!hnae3_get_bit(flag, HCLGEVF_CMDQ_RX_OUTVLD_B))) { dev_warn(&hdev->pdev->dev, "dropped invalid mailbox message, code = %u\n", - req->msg[0]); + req->msg.code); /* dropping/not processing this invalid message */ crq->desc[crq->next_to_use].flag = 0; @@ -183,19 +185,21 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev) * timeout and simultaneously queue the async messages for later * prcessing in context of mailbox task i.e. the slow path. */ - switch (req->msg[0]) { + switch (req->msg.code) { case HCLGE_MBX_PF_VF_RESP: if (resp->received_resp) dev_warn(&hdev->pdev->dev, "VF mbx resp flag not clear(%u)\n", - req->msg[1]); + req->msg.vf_mbx_msg_code); resp->received_resp = true; - resp->origin_mbx_msg = (req->msg[1] << 16); - resp->origin_mbx_msg |= req->msg[2]; - resp->resp_status = req->msg[3]; + resp->origin_mbx_msg = + (req->msg.vf_mbx_msg_code << 16); + resp->origin_mbx_msg |= req->msg.vf_mbx_msg_subcode; + resp->resp_status = + hclgevf_resp_to_errno(req->msg.resp_status); - temp = (u8 *)&req->msg[4]; + temp = (u8 *)req->msg.resp_data; for (i = 0; i < HCLGE_MBX_MAX_RESP_DATA_SIZE; i++) { resp->additional_info[i] = *temp; temp++; @@ -220,13 +224,13 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev) HCLGE_MBX_MAX_ARQ_MSG_NUM) { dev_warn(&hdev->pdev->dev, "Async Q full, dropping msg(%u)\n", - req->msg[1]); + req->msg.code); break; } /* tail the async message in arq */ msg_q = hdev->arq.msg_q[hdev->arq.tail]; - memcpy(&msg_q[0], req->msg, + memcpy(&msg_q[0], &req->msg, HCLGE_MBX_MAX_ARQ_MSG_SIZE * sizeof(u16)); hclge_mbx_tail_ptr_move_arq(hdev->arq); atomic_inc(&hdev->arq.count); @@ -237,7 +241,7 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev) default: dev_err(&hdev->pdev->dev, "VF received unsupported(%u) mbx msg from PF\n", - req->msg[0]); + req->msg.code); break; } crq->desc[crq->next_to_use].flag = 0; diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index c75239d8820f..4bd33245bad6 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2142,6 +2142,8 @@ static void __ibmvnic_reset(struct work_struct *work) { struct ibmvnic_rwi *rwi; struct ibmvnic_adapter *adapter; + bool saved_state = false; + unsigned long flags; u32 reset_state; int rc = 0; @@ -2153,17 +2155,25 @@ static void __ibmvnic_reset(struct work_struct *work) return; } - reset_state = adapter->state; - rwi = get_next_rwi(adapter); while (rwi) { + spin_lock_irqsave(&adapter->state_lock, flags); + if (adapter->state == VNIC_REMOVING || adapter->state == VNIC_REMOVED) { + spin_unlock_irqrestore(&adapter->state_lock, flags); kfree(rwi); rc = EBUSY; break; } + if (!saved_state) { + reset_state = adapter->state; + adapter->state = VNIC_RESETTING; + saved_state = true; + } + spin_unlock_irqrestore(&adapter->state_lock, flags); + if (rwi->reset_reason == VNIC_RESET_CHANGE_PARAM) { /* CHANGE_PARAM requestor holds rtnl_lock */ rc = do_change_param_reset(adapter, rwi, reset_state); @@ -5091,6 +5101,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) __ibmvnic_delayed_reset); INIT_LIST_HEAD(&adapter->rwi_list); spin_lock_init(&adapter->rwi_lock); + spin_lock_init(&adapter->state_lock); mutex_init(&adapter->fw_lock); init_completion(&adapter->init_done); init_completion(&adapter->fw_done); @@ -5163,8 +5174,17 @@ static int ibmvnic_remove(struct vio_dev *dev) { struct net_device *netdev = dev_get_drvdata(&dev->dev); struct ibmvnic_adapter *adapter = netdev_priv(netdev); + unsigned long flags; + + spin_lock_irqsave(&adapter->state_lock, flags); + if (adapter->state == VNIC_RESETTING) { + spin_unlock_irqrestore(&adapter->state_lock, flags); + return -EBUSY; + } adapter->state = VNIC_REMOVING; + spin_unlock_irqrestore(&adapter->state_lock, flags); + rtnl_lock(); unregister_netdevice(netdev); diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 60eccaf91b12..f8416e1d4cf0 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -941,7 +941,8 @@ enum vnic_state {VNIC_PROBING = 1, VNIC_CLOSING, VNIC_CLOSED, VNIC_REMOVING, - VNIC_REMOVED}; + VNIC_REMOVED, + VNIC_RESETTING}; enum ibmvnic_reset_reason {VNIC_RESET_FAILOVER = 1, VNIC_RESET_MOBILITY, @@ -1090,4 +1091,7 @@ struct ibmvnic_adapter { struct ibmvnic_tunables desired; struct ibmvnic_tunables fallback; + + /* Used for serializatin of state field */ + spinlock_t state_lock; }; diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 154e2e818ec6..ad34e4335df2 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -294,6 +294,7 @@ config ICE tristate "Intel(R) Ethernet Connection E800 Series Support" default n depends on PCI_MSI + select NET_DEVLINK ---help--- This driver supports Intel(R) Ethernet Connection E800 Series of devices. For more information on how to identify your adapter, go diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index be56e631d693..6f45df5690d4 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -1852,6 +1852,7 @@ static void e1000_get_strings(struct net_device *netdev, u32 stringset, } static const struct ethtool_ops e1000_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS, .get_drvinfo = e1000_get_drvinfo, .get_regs_len = e1000_get_regs_len, .get_regs = e1000_get_regs, diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c index 68edf55ac906..37fbc646deb9 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c @@ -1151,6 +1151,8 @@ static int fm10k_set_channels(struct net_device *dev, } static const struct ethtool_ops fm10k_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_USE_ADAPTIVE, .get_strings = fm10k_get_strings, .get_sset_count = fm10k_get_sset_count, .get_ethtool_stats = fm10k_get_ethtool_stats, diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 317f3f1458db..aa8026b1eb81 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -5249,6 +5249,11 @@ static const struct ethtool_ops i40e_ethtool_recovery_mode_ops = { }; static const struct ethtool_ops i40e_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES_IRQ | + ETHTOOL_COALESCE_USE_ADAPTIVE | + ETHTOOL_COALESCE_RX_USECS_HIGH | + ETHTOOL_COALESCE_TX_USECS_HIGH, .get_drvinfo = i40e_get_drvinfo, .get_regs_len = i40e_get_regs_len, .get_regs = i40e_get_regs, diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index f807e2c7597f..2c39d46b6138 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -996,6 +996,10 @@ static int iavf_set_rxfh(struct net_device *netdev, const u32 *indir, } static const struct ethtool_ops iavf_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_MAX_FRAMES_IRQ | + ETHTOOL_COALESCE_USE_ADAPTIVE, .get_drvinfo = iavf_get_drvinfo, .get_link = ethtool_op_get_link, .get_ringparam = iavf_get_ringparam, diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index 59544b0fc086..29c6c6743450 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -19,6 +19,7 @@ ice-y := ice_main.o \ ice_txrx.o \ ice_flex_pipe.o \ ice_flow.o \ + ice_devlink.o \ ice_ethtool.o ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index ce73a6a96aac..8ce3afcfeca0 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -34,6 +34,7 @@ #include <linux/ctype.h> #include <linux/bpf.h> #include <linux/avf/virtchnl.h> +#include <net/devlink.h> #include <net/ipv6.h> #include <net/xdp_sock.h> #include "ice_devids.h" @@ -347,6 +348,9 @@ enum ice_pf_flags { struct ice_pf { struct pci_dev *pdev; + /* devlink port data */ + struct devlink_port devlink_port; + /* OS reserved IRQ details */ struct msix_entry *msix_entries; struct ice_res_tracker *irq_tracker; diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 38b6ffb6ad2e..2381b4014ed6 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -1232,6 +1232,7 @@ struct ice_aqc_sff_eeprom { * NVM Update commands (indirect 0x0703) */ struct ice_aqc_nvm { +#define ICE_AQC_NVM_MAX_OFFSET 0xFFFFFF __le16 offset_low; u8 offset_high; u8 cmd_flags; @@ -1250,6 +1251,8 @@ struct ice_aqc_nvm { __le32 addr_low; }; +#define ICE_AQC_NVM_START_POINT 0 + /* NVM Checksum Command (direct, 0x0706) */ struct ice_aqc_nvm_checksum { u8 flags; @@ -1764,6 +1767,7 @@ enum ice_aq_err { ICE_AQ_RC_ENOMEM = 9, /* Out of memory */ ICE_AQ_RC_EBUSY = 12, /* Device or resource busy */ ICE_AQ_RC_EEXIST = 13, /* Object already exists */ + ICE_AQ_RC_EINVAL = 14, /* Invalid argument */ ICE_AQ_RC_ENOSPC = 16, /* No space left or allocation failure */ ICE_AQ_RC_ENOSYS = 17, /* Function not implemented */ ICE_AQ_RC_ENOSEC = 24, /* Missing security manifest */ diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index e574a70fcc99..2c0d8fd3d5cd 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -615,29 +615,6 @@ static void ice_get_itr_intrl_gran(struct ice_hw *hw) } /** - * ice_get_nvm_version - get cached NVM version data - * @hw: pointer to the hardware structure - * @oem_ver: 8 bit NVM version - * @oem_build: 16 bit NVM build number - * @oem_patch: 8 NVM patch number - * @ver_hi: high 8 bits of the NVM version - * @ver_lo: low 8 bits of the NVM version - */ -void -ice_get_nvm_version(struct ice_hw *hw, u8 *oem_ver, u16 *oem_build, - u8 *oem_patch, u8 *ver_hi, u8 *ver_lo) -{ - struct ice_nvm_info *nvm = &hw->nvm; - - *oem_ver = (u8)((nvm->oem_ver & ICE_OEM_VER_MASK) >> ICE_OEM_VER_SHIFT); - *oem_patch = (u8)(nvm->oem_ver & ICE_OEM_VER_PATCH_MASK); - *oem_build = (u16)((nvm->oem_ver & ICE_OEM_VER_BUILD_MASK) >> - ICE_OEM_VER_BUILD_SHIFT); - *ver_hi = (nvm->ver & ICE_NVM_VER_HI_MASK) >> ICE_NVM_VER_HI_SHIFT; - *ver_lo = (nvm->ver & ICE_NVM_VER_LO_MASK) >> ICE_NVM_VER_LO_SHIFT; -} - -/** * ice_init_hw - main hardware initialization routine * @hw: pointer to the hardware structure */ @@ -958,72 +935,6 @@ enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req) } /** - * ice_get_pfa_module_tlv - Reads sub module TLV from NVM PFA - * @hw: pointer to hardware structure - * @module_tlv: pointer to module TLV to return - * @module_tlv_len: pointer to module TLV length to return - * @module_type: module type requested - * - * Finds the requested sub module TLV type from the Preserved Field - * Area (PFA) and returns the TLV pointer and length. The caller can - * use these to read the variable length TLV value. - */ -enum ice_status -ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len, - u16 module_type) -{ - enum ice_status status; - u16 pfa_len, pfa_ptr; - u16 next_tlv; - - status = ice_read_sr_word(hw, ICE_SR_PFA_PTR, &pfa_ptr); - if (status) { - ice_debug(hw, ICE_DBG_INIT, "Preserved Field Array pointer.\n"); - return status; - } - status = ice_read_sr_word(hw, pfa_ptr, &pfa_len); - if (status) { - ice_debug(hw, ICE_DBG_INIT, "Failed to read PFA length.\n"); - return status; - } - /* Starting with first TLV after PFA length, iterate through the list - * of TLVs to find the requested one. - */ - next_tlv = pfa_ptr + 1; - while (next_tlv < pfa_ptr + pfa_len) { - u16 tlv_sub_module_type; - u16 tlv_len; - - /* Read TLV type */ - status = ice_read_sr_word(hw, next_tlv, &tlv_sub_module_type); - if (status) { - ice_debug(hw, ICE_DBG_INIT, "Failed to read TLV type.\n"); - break; - } - /* Read TLV length */ - status = ice_read_sr_word(hw, next_tlv + 1, &tlv_len); - if (status) { - ice_debug(hw, ICE_DBG_INIT, "Failed to read TLV length.\n"); - break; - } - if (tlv_sub_module_type == module_type) { - if (tlv_len) { - *module_tlv = next_tlv; - *module_tlv_len = tlv_len; - return 0; - } - return ICE_ERR_INVAL_SIZE; - } - /* Check next TLV, i.e. current TLV pointer + length + 2 words - * (for current TLV's type and length) - */ - next_tlv = next_tlv + tlv_len + 2; - } - /* Module does not exist */ - return ICE_ERR_DOES_NOT_EXIST; -} - -/** * ice_copy_rxq_ctx_to_hw * @hw: pointer to the hardware structure * @ice_rxq_ctx: pointer to the rxq context diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index f9fc005d35a7..8104f3d64d96 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -15,9 +15,6 @@ enum ice_status ice_nvm_validate_checksum(struct ice_hw *hw); enum ice_status ice_init_hw(struct ice_hw *hw); void ice_deinit_hw(struct ice_hw *hw); -enum ice_status -ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len, - u16 module_type); enum ice_status ice_check_reset(struct ice_hw *hw); enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req); enum ice_status ice_create_all_ctrlq(struct ice_hw *hw); @@ -38,9 +35,6 @@ enum ice_status ice_alloc_hw_res(struct ice_hw *hw, u16 type, u16 num, bool btm, u16 *res); enum ice_status ice_free_hw_res(struct ice_hw *hw, u16 type, u16 num, u16 *res); -enum ice_status ice_init_nvm(struct ice_hw *hw); -enum ice_status -ice_read_sr_buf(struct ice_hw *hw, u16 offset, u16 *words, u16 *data); enum ice_status ice_aq_alloc_free_res(struct ice_hw *hw, u16 num_entries, struct ice_aqc_alloc_free_res_elem *buf, u16 buf_size, @@ -153,9 +147,6 @@ ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, void ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, u64 *prev_stat, u64 *cur_stat); -void -ice_get_nvm_version(struct ice_hw *hw, u8 *oem_ver, u16 *oem_build, - u8 *oem_patch, u8 *ver_hi, u8 *ver_lo); enum ice_status ice_sched_query_elem(struct ice_hw *hw, u32 node_teid, struct ice_aqc_get_elem *buf); diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c new file mode 100644 index 000000000000..27c5034c039a --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -0,0 +1,320 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020, Intel Corporation. */ + +#include "ice.h" +#include "ice_lib.h" +#include "ice_devlink.h" + +static int ice_info_get_dsn(struct ice_pf *pf, char *buf, size_t len) +{ + u8 dsn[8]; + + /* Copy the DSN into an array in Big Endian format */ + put_unaligned_be64(pci_get_dsn(pf->pdev), dsn); + + snprintf(buf, len, "%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x", + dsn[0], dsn[1], dsn[2], dsn[3], + dsn[4], dsn[5], dsn[6], dsn[7]); + + return 0; +} + +static int ice_info_pba(struct ice_pf *pf, char *buf, size_t len) +{ + struct ice_hw *hw = &pf->hw; + enum ice_status status; + + status = ice_read_pba_string(hw, (u8 *)buf, len); + if (status) + return -EIO; + + return 0; +} + +static int ice_info_fw_mgmt(struct ice_pf *pf, char *buf, size_t len) +{ + struct ice_hw *hw = &pf->hw; + + snprintf(buf, len, "%u.%u.%u", hw->fw_maj_ver, hw->fw_min_ver, + hw->fw_patch); + + return 0; +} + +static int ice_info_fw_api(struct ice_pf *pf, char *buf, size_t len) +{ + struct ice_hw *hw = &pf->hw; + + snprintf(buf, len, "%u.%u", hw->api_maj_ver, hw->api_min_ver); + + return 0; +} + +static int ice_info_fw_build(struct ice_pf *pf, char *buf, size_t len) +{ + struct ice_hw *hw = &pf->hw; + + snprintf(buf, len, "0x%08x", hw->fw_build); + + return 0; +} + +static int ice_info_orom_ver(struct ice_pf *pf, char *buf, size_t len) +{ + struct ice_orom_info *orom = &pf->hw.nvm.orom; + + snprintf(buf, len, "%u.%u.%u", orom->major, orom->build, orom->patch); + + return 0; +} + +static int ice_info_nvm_ver(struct ice_pf *pf, char *buf, size_t len) +{ + struct ice_nvm_info *nvm = &pf->hw.nvm; + + snprintf(buf, len, "%x.%02x", nvm->major_ver, nvm->minor_ver); + + return 0; +} + +static int ice_info_eetrack(struct ice_pf *pf, char *buf, size_t len) +{ + struct ice_nvm_info *nvm = &pf->hw.nvm; + + snprintf(buf, len, "0x%08x", nvm->eetrack); + + return 0; +} + +static int ice_info_ddp_pkg_name(struct ice_pf *pf, char *buf, size_t len) +{ + struct ice_hw *hw = &pf->hw; + + snprintf(buf, len, "%s", hw->active_pkg_name); + + return 0; +} + +static int ice_info_ddp_pkg_version(struct ice_pf *pf, char *buf, size_t len) +{ + struct ice_pkg_ver *pkg = &pf->hw.active_pkg_ver; + + snprintf(buf, len, "%u.%u.%u.%u", pkg->major, pkg->minor, pkg->update, + pkg->draft); + + return 0; +} + +#define fixed(key, getter) { ICE_VERSION_FIXED, key, getter } +#define running(key, getter) { ICE_VERSION_RUNNING, key, getter } + +enum ice_version_type { + ICE_VERSION_FIXED, + ICE_VERSION_RUNNING, + ICE_VERSION_STORED, +}; + +static const struct ice_devlink_version { + enum ice_version_type type; + const char *key; + int (*getter)(struct ice_pf *pf, char *buf, size_t len); +} ice_devlink_versions[] = { + fixed(DEVLINK_INFO_VERSION_GENERIC_BOARD_ID, ice_info_pba), + running(DEVLINK_INFO_VERSION_GENERIC_FW_MGMT, ice_info_fw_mgmt), + running("fw.mgmt.api", ice_info_fw_api), + running("fw.mgmt.build", ice_info_fw_build), + running(DEVLINK_INFO_VERSION_GENERIC_FW_UNDI, ice_info_orom_ver), + running("fw.psid.api", ice_info_nvm_ver), + running(DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID, ice_info_eetrack), + running("fw.app.name", ice_info_ddp_pkg_name), + running(DEVLINK_INFO_VERSION_GENERIC_FW_APP, ice_info_ddp_pkg_version), +}; + +/** + * ice_devlink_info_get - .info_get devlink handler + * @devlink: devlink instance structure + * @req: the devlink info request + * @extack: extended netdev ack structure + * + * Callback for the devlink .info_get operation. Reports information about the + * device. + * + * Return: zero on success or an error code on failure. + */ +static int ice_devlink_info_get(struct devlink *devlink, + struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_priv(devlink); + char buf[100]; + size_t i; + int err; + + err = devlink_info_driver_name_put(req, KBUILD_MODNAME); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Unable to set driver name"); + return err; + } + + err = ice_info_get_dsn(pf, buf, sizeof(buf)); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Unable to obtain serial number"); + return err; + } + + err = devlink_info_serial_number_put(req, buf); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Unable to set serial number"); + return err; + } + + for (i = 0; i < ARRAY_SIZE(ice_devlink_versions); i++) { + enum ice_version_type type = ice_devlink_versions[i].type; + const char *key = ice_devlink_versions[i].key; + + err = ice_devlink_versions[i].getter(pf, buf, sizeof(buf)); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Unable to obtain version info"); + return err; + } + + switch (type) { + case ICE_VERSION_FIXED: + err = devlink_info_version_fixed_put(req, key, buf); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Unable to set fixed version"); + return err; + } + break; + case ICE_VERSION_RUNNING: + err = devlink_info_version_running_put(req, key, buf); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Unable to set running version"); + return err; + } + break; + case ICE_VERSION_STORED: + err = devlink_info_version_stored_put(req, key, buf); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Unable to set stored version"); + return err; + } + break; + } + } + + return 0; +} + +static const struct devlink_ops ice_devlink_ops = { + .info_get = ice_devlink_info_get, +}; + +static void ice_devlink_free(void *devlink_ptr) +{ + devlink_free((struct devlink *)devlink_ptr); +} + +/** + * ice_allocate_pf - Allocate devlink and return PF structure pointer + * @dev: the device to allocate for + * + * Allocate a devlink instance for this device and return the private area as + * the PF structure. The devlink memory is kept track of through devres by + * adding an action to remove it when unwinding. + */ +struct ice_pf *ice_allocate_pf(struct device *dev) +{ + struct devlink *devlink; + + devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf)); + if (!devlink) + return NULL; + + /* Add an action to teardown the devlink when unwinding the driver */ + if (devm_add_action(dev, ice_devlink_free, devlink)) { + devlink_free(devlink); + return NULL; + } + + return devlink_priv(devlink); +} + +/** + * ice_devlink_register - Register devlink interface for this PF + * @pf: the PF to register the devlink for. + * + * Register the devlink instance associated with this physical function. + * + * Return: zero on success or an error code on failure. + */ +int ice_devlink_register(struct ice_pf *pf) +{ + struct devlink *devlink = priv_to_devlink(pf); + struct device *dev = ice_pf_to_dev(pf); + int err; + + err = devlink_register(devlink, dev); + if (err) { + dev_err(dev, "devlink registration failed: %d\n", err); + return err; + } + + return 0; +} + +/** + * ice_devlink_unregister - Unregister devlink resources for this PF. + * @pf: the PF structure to cleanup + * + * Releases resources used by devlink and cleans up associated memory. + */ +void ice_devlink_unregister(struct ice_pf *pf) +{ + devlink_unregister(priv_to_devlink(pf)); +} + +/** + * ice_devlink_create_port - Create a devlink port for this PF + * @pf: the PF to create a port for + * + * Create and register a devlink_port for this PF. Note that although each + * physical function is connected to a separate devlink instance, the port + * will still be numbered according to the physical function id. + * + * Return: zero on success or an error code on failure. + */ +int ice_devlink_create_port(struct ice_pf *pf) +{ + struct devlink *devlink = priv_to_devlink(pf); + struct ice_vsi *vsi = ice_get_main_vsi(pf); + struct device *dev = ice_pf_to_dev(pf); + int err; + + if (!vsi) { + dev_err(dev, "%s: unable to find main VSI\n", __func__); + return -EIO; + } + + devlink_port_attrs_set(&pf->devlink_port, DEVLINK_PORT_FLAVOUR_PHYSICAL, + pf->hw.pf_id, false, 0, NULL, 0); + err = devlink_port_register(devlink, &pf->devlink_port, pf->hw.pf_id); + if (err) { + dev_err(dev, "devlink_port_register failed: %d\n", err); + return err; + } + + return 0; +} + +/** + * ice_devlink_destroy_port - Destroy the devlink_port for this PF + * @pf: the PF to cleanup + * + * Unregisters the devlink_port structure associated with this PF. + */ +void ice_devlink_destroy_port(struct ice_pf *pf) +{ + devlink_port_type_clear(&pf->devlink_port); + devlink_port_unregister(&pf->devlink_port); +} diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.h b/drivers/net/ethernet/intel/ice/ice_devlink.h new file mode 100644 index 000000000000..f94dc93c24c5 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_devlink.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019, Intel Corporation. */ + +#ifndef _ICE_DEVLINK_H_ +#define _ICE_DEVLINK_H_ + +struct ice_pf *ice_allocate_pf(struct device *dev); + +int ice_devlink_register(struct ice_pf *pf); +void ice_devlink_unregister(struct ice_pf *pf); +int ice_devlink_create_port(struct ice_pf *pf); +void ice_devlink_destroy_port(struct ice_pf *pf); + +#endif /* _ICE_DEVLINK_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index e3d148f12aac..593fb37bd59e 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -167,11 +167,14 @@ static void ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { struct ice_netdev_priv *np = netdev_priv(netdev); - u8 oem_ver, oem_patch, nvm_ver_hi, nvm_ver_lo; struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; - u16 oem_build; + struct ice_orom_info *orom; + struct ice_nvm_info *nvm; + + nvm = &hw->nvm; + orom = &nvm->orom; strscpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver)); strscpy(drvinfo->version, ice_drv_ver, sizeof(drvinfo->version)); @@ -179,11 +182,9 @@ ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) /* Display NVM version (from which the firmware version can be * determined) which contains more pertinent information. */ - ice_get_nvm_version(hw, &oem_ver, &oem_build, &oem_patch, - &nvm_ver_hi, &nvm_ver_lo); snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), - "%x.%02x 0x%x %d.%d.%d", nvm_ver_hi, nvm_ver_lo, - hw->nvm.eetrack, oem_ver, oem_build, oem_patch); + "%x.%02x 0x%x %d.%d.%d", nvm->major_ver, nvm->minor_ver, + nvm->eetrack, orom->major, orom->build, orom->patch); strscpy(drvinfo->bus_info, pci_name(pf->pdev), sizeof(drvinfo->bus_info)); @@ -244,7 +245,7 @@ static int ice_get_eeprom_len(struct net_device *netdev) struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_pf *pf = np->vsi->back; - return (int)(pf->hw.nvm.sr_words * sizeof(u16)); + return (int)pf->hw.nvm.flash_size; } static int @@ -252,39 +253,46 @@ ice_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom, u8 *bytes) { struct ice_netdev_priv *np = netdev_priv(netdev); - u16 first_word, last_word, nwords; struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; enum ice_status status; struct device *dev; int ret = 0; - u16 *buf; + u8 *buf; dev = ice_pf_to_dev(pf); eeprom->magic = hw->vendor_id | (hw->device_id << 16); + netdev_dbg(netdev, "GEEPROM cmd 0x%08x, offset 0x%08x, len 0x%08x\n", + eeprom->cmd, eeprom->offset, eeprom->len); - first_word = eeprom->offset >> 1; - last_word = (eeprom->offset + eeprom->len - 1) >> 1; - nwords = last_word - first_word + 1; - - buf = devm_kcalloc(dev, nwords, sizeof(u16), GFP_KERNEL); + buf = kzalloc(eeprom->len, GFP_KERNEL); if (!buf) return -ENOMEM; - status = ice_read_sr_buf(hw, first_word, &nwords, buf); + status = ice_acquire_nvm(hw, ICE_RES_READ); if (status) { - dev_err(dev, "ice_read_sr_buf failed, err %d aq_err %d\n", + dev_err(dev, "ice_acquire_nvm failed, err %d aq_err %d\n", status, hw->adminq.sq_last_status); - eeprom->len = sizeof(u16) * nwords; ret = -EIO; goto out; } - memcpy(bytes, (u8 *)buf + (eeprom->offset & 1), eeprom->len); + status = ice_read_flat_nvm(hw, eeprom->offset, &eeprom->len, buf, + false); + if (status) { + dev_err(dev, "ice_read_flat_nvm failed, err %d aq_err %d\n", + status, hw->adminq.sq_last_status); + ret = -EIO; + goto release; + } + + memcpy(bytes, buf, eeprom->len); +release: + ice_release_nvm(hw); out: - devm_kfree(dev, buf); + kfree(buf); return ret; } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 89c090d32bb2..359ff8544773 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -10,6 +10,7 @@ #include "ice_lib.h" #include "ice_dcb_lib.h" #include "ice_dcb_nl.h" +#include "ice_devlink.h" #define DRV_VERSION_MAJOR 0 #define DRV_VERSION_MINOR 8 @@ -2371,10 +2372,16 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) u8 mac_addr[ETH_ALEN]; int err; + err = ice_devlink_create_port(pf); + if (err) + return err; + netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq, vsi->alloc_rxq); - if (!netdev) - return -ENOMEM; + if (!netdev) { + err = -ENOMEM; + goto err_destroy_devlink_port; + } vsi->netdev = netdev; np = netdev_priv(netdev); @@ -2404,7 +2411,9 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) err = register_netdev(vsi->netdev); if (err) - return err; + goto err_destroy_devlink_port; + + devlink_port_type_eth_set(&pf->devlink_port, vsi->netdev); netif_carrier_off(vsi->netdev); @@ -2412,6 +2421,11 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) netif_tx_stop_all_queues(vsi->netdev); return 0; + +err_destroy_devlink_port: + ice_devlink_destroy_port(pf); + + return err; } /** @@ -3184,7 +3198,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) return err; } - pf = devm_kzalloc(dev, sizeof(*pf), GFP_KERNEL); + pf = ice_allocate_pf(dev); if (!pf) return -ENOMEM; @@ -3222,6 +3236,12 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) pf->msg_enable = netif_msg_init(debug, ICE_DFLT_NETIF_M); + err = ice_devlink_register(pf); + if (err) { + dev_err(dev, "ice_devlink_register failed: %d\n", err); + goto err_exit_unroll; + } + #ifndef CONFIG_DYNAMIC_DEBUG if (debug < -1) hw->debug_mask = debug; @@ -3354,6 +3374,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) return 0; err_alloc_sw_unroll: + ice_devlink_destroy_port(pf); set_bit(__ICE_SERVICE_DIS, pf->state); set_bit(__ICE_DOWN, pf->state); devm_kfree(dev, pf->first_sw); @@ -3366,6 +3387,7 @@ err_init_pf_unroll: ice_deinit_pf(pf); ice_deinit_hw(hw); err_exit_unroll: + ice_devlink_unregister(pf); pci_disable_pcie_error_reporting(pdev); return err; } @@ -3396,6 +3418,7 @@ static void ice_remove(struct pci_dev *pdev) set_bit(__ICE_DOWN, pf->state); ice_service_task_stop(pf); + ice_devlink_destroy_port(pf); ice_vsi_release_all(pf); ice_free_irq_msix_misc(pf); ice_for_each_vsi(pf, i) { @@ -3405,6 +3428,8 @@ static void ice_remove(struct pci_dev *pdev) } ice_deinit_pf(pf); ice_deinit_hw(&pf->hw); + ice_devlink_unregister(pf); + /* Issue a PFR as part of the prescribed driver unload flow. Do not * do it via ice_schedule_reset() since there is no need to rebuild * and the service task is already stopped. diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c index f6e25db22c23..8beb675d676b 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.c +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -11,25 +11,29 @@ * @length: length of the section to be read (in bytes from the offset) * @data: command buffer (size [bytes] = length) * @last_command: tells if this is the last command in a series + * @read_shadow_ram: tell if this is a shadow RAM read * @cd: pointer to command details structure or NULL * * Read the NVM using the admin queue commands (0x0701) */ static enum ice_status ice_aq_read_nvm(struct ice_hw *hw, u16 module_typeid, u32 offset, u16 length, - void *data, bool last_command, struct ice_sq_cd *cd) + void *data, bool last_command, bool read_shadow_ram, + struct ice_sq_cd *cd) { struct ice_aq_desc desc; struct ice_aqc_nvm *cmd; cmd = &desc.params.nvm; - /* In offset the highest byte must be zeroed. */ - if (offset & 0xFF000000) + if (offset > ICE_AQC_NVM_MAX_OFFSET) return ICE_ERR_PARAM; ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_read); + if (!read_shadow_ram && module_typeid == ICE_AQC_NVM_START_POINT) + cmd->cmd_flags |= ICE_AQC_NVM_FLASH_ONLY; + /* If this is the last command in a series, set the proper flag. */ if (last_command) cmd->cmd_flags |= ICE_AQC_NVM_LAST_CMD; @@ -42,65 +46,64 @@ ice_aq_read_nvm(struct ice_hw *hw, u16 module_typeid, u32 offset, u16 length, } /** - * ice_check_sr_access_params - verify params for Shadow RAM R/W operations. - * @hw: pointer to the HW structure - * @offset: offset in words from module start - * @words: number of words to access + * ice_read_flat_nvm - Read portion of NVM by flat offset + * @hw: pointer to the HW struct + * @offset: offset from beginning of NVM + * @length: (in) number of bytes to read; (out) number of bytes actually read + * @data: buffer to return data in (sized to fit the specified length) + * @read_shadow_ram: if true, read from shadow RAM instead of NVM + * + * Reads a portion of the NVM, as a flat memory space. This function correctly + * breaks read requests across Shadow RAM sectors and ensures that no single + * read request exceeds the maximum 4Kb read for a single AdminQ command. + * + * Returns a status code on failure. Note that the data pointer may be + * partially updated if some reads succeed before a failure. */ -static enum ice_status -ice_check_sr_access_params(struct ice_hw *hw, u32 offset, u16 words) +enum ice_status +ice_read_flat_nvm(struct ice_hw *hw, u32 offset, u32 *length, u8 *data, + bool read_shadow_ram) { - if ((offset + words) > hw->nvm.sr_words) { - ice_debug(hw, ICE_DBG_NVM, - "NVM error: offset beyond SR lmt.\n"); - return ICE_ERR_PARAM; - } + enum ice_status status; + u32 inlen = *length; + u32 bytes_read = 0; + bool last_cmd; - if (words > ICE_SR_SECTOR_SIZE_IN_WORDS) { - /* We can access only up to 4KB (one sector), in one AQ write */ - ice_debug(hw, ICE_DBG_NVM, - "NVM error: tried to access %d words, limit is %d.\n", - words, ICE_SR_SECTOR_SIZE_IN_WORDS); - return ICE_ERR_PARAM; - } + *length = 0; - if (((offset + (words - 1)) / ICE_SR_SECTOR_SIZE_IN_WORDS) != - (offset / ICE_SR_SECTOR_SIZE_IN_WORDS)) { - /* A single access cannot spread over two sectors */ + /* Verify the length of the read if this is for the Shadow RAM */ + if (read_shadow_ram && ((offset + inlen) > (hw->nvm.sr_words * 2u))) { ice_debug(hw, ICE_DBG_NVM, - "NVM error: cannot spread over two sectors.\n"); + "NVM error: requested offset is beyond Shadow RAM limit\n"); return ICE_ERR_PARAM; } - return 0; -} + do { + u32 read_size, sector_offset; -/** - * ice_read_sr_aq - Read Shadow RAM. - * @hw: pointer to the HW structure - * @offset: offset in words from module start - * @words: number of words to read - * @data: buffer for words reads from Shadow RAM - * @last_command: tells the AdminQ that this is the last command - * - * Reads 16-bit word buffers from the Shadow RAM using the admin command. - */ -static enum ice_status -ice_read_sr_aq(struct ice_hw *hw, u32 offset, u16 words, u16 *data, - bool last_command) -{ - enum ice_status status; + /* ice_aq_read_nvm cannot read more than 4Kb at a time. + * Additionally, a read from the Shadow RAM may not cross over + * a sector boundary. Conveniently, the sector size is also + * 4Kb. + */ + sector_offset = offset % ICE_AQ_MAX_BUF_LEN; + read_size = min_t(u32, ICE_AQ_MAX_BUF_LEN - sector_offset, + inlen - bytes_read); - status = ice_check_sr_access_params(hw, offset, words); + last_cmd = !(bytes_read + read_size < inlen); - /* values in "offset" and "words" parameters are sized as words - * (16 bits) but ice_aq_read_nvm expects these values in bytes. - * So do this conversion while calling ice_aq_read_nvm. - */ - if (!status) - status = ice_aq_read_nvm(hw, 0, 2 * offset, 2 * words, data, - last_command, NULL); + status = ice_aq_read_nvm(hw, ICE_AQC_NVM_START_POINT, + offset, read_size, + data + bytes_read, last_cmd, + read_shadow_ram, NULL); + if (status) + break; + + bytes_read += read_size; + offset += read_size; + } while (!last_cmd); + *length = bytes_read; return status; } @@ -110,75 +113,25 @@ ice_read_sr_aq(struct ice_hw *hw, u32 offset, u16 words, u16 *data, * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF) * @data: word read from the Shadow RAM * - * Reads one 16 bit word from the Shadow RAM using the ice_read_sr_aq method. + * Reads one 16 bit word from the Shadow RAM using ice_read_flat_nvm. */ static enum ice_status ice_read_sr_word_aq(struct ice_hw *hw, u16 offset, u16 *data) { + u32 bytes = sizeof(u16); enum ice_status status; + __le16 data_local; - status = ice_read_sr_aq(hw, offset, 1, data, true); - if (!status) - *data = le16_to_cpu(*(__force __le16 *)data); - - return status; -} - -/** - * ice_read_sr_buf_aq - Reads Shadow RAM buf via AQ - * @hw: pointer to the HW structure - * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF) - * @words: (in) number of words to read; (out) number of words actually read - * @data: words read from the Shadow RAM - * - * Reads 16 bit words (data buf) from the SR using the ice_read_sr_aq - * method. Ownership of the NVM is taken before reading the buffer and later - * released. - */ -static enum ice_status -ice_read_sr_buf_aq(struct ice_hw *hw, u16 offset, u16 *words, u16 *data) -{ - enum ice_status status; - bool last_cmd = false; - u16 words_read = 0; - u16 i = 0; - - do { - u16 read_size, off_w; - - /* Calculate number of bytes we should read in this step. - * It's not allowed to read more than one page at a time or - * to cross page boundaries. - */ - off_w = offset % ICE_SR_SECTOR_SIZE_IN_WORDS; - read_size = off_w ? - min_t(u16, *words, - (ICE_SR_SECTOR_SIZE_IN_WORDS - off_w)) : - min_t(u16, (*words - words_read), - ICE_SR_SECTOR_SIZE_IN_WORDS); - - /* Check if this is last command, if so set proper flag */ - if ((words_read + read_size) >= *words) - last_cmd = true; - - status = ice_read_sr_aq(hw, offset, read_size, - data + words_read, last_cmd); - if (status) - goto read_nvm_buf_aq_exit; - - /* Increment counter for words already read and move offset to - * new read location - */ - words_read += read_size; - offset += read_size; - } while (words_read < *words); - - for (i = 0; i < *words; i++) - data[i] = le16_to_cpu(((__force __le16 *)data)[i]); + /* Note that ice_read_flat_nvm takes into account the 4Kb AdminQ and + * Shadow RAM sector restrictions necessary when reading from the NVM. + */ + status = ice_read_flat_nvm(hw, offset * sizeof(u16), &bytes, + (u8 *)&data_local, true); + if (status) + return status; -read_nvm_buf_aq_exit: - *words = words_read; - return status; + *data = le16_to_cpu(data_local); + return 0; } /** @@ -188,7 +141,7 @@ read_nvm_buf_aq_exit: * * This function will request NVM ownership. */ -static enum ice_status +enum ice_status ice_acquire_nvm(struct ice_hw *hw, enum ice_aq_res_access_type access) { if (hw->nvm.blank_nvm_mode) @@ -203,7 +156,7 @@ ice_acquire_nvm(struct ice_hw *hw, enum ice_aq_res_access_type access) * * This function will release NVM ownership. */ -static void ice_release_nvm(struct ice_hw *hw) +void ice_release_nvm(struct ice_hw *hw) { if (hw->nvm.blank_nvm_mode) return; @@ -233,6 +186,239 @@ enum ice_status ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data) } /** + * ice_get_pfa_module_tlv - Reads sub module TLV from NVM PFA + * @hw: pointer to hardware structure + * @module_tlv: pointer to module TLV to return + * @module_tlv_len: pointer to module TLV length to return + * @module_type: module type requested + * + * Finds the requested sub module TLV type from the Preserved Field + * Area (PFA) and returns the TLV pointer and length. The caller can + * use these to read the variable length TLV value. + */ +enum ice_status +ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len, + u16 module_type) +{ + enum ice_status status; + u16 pfa_len, pfa_ptr; + u16 next_tlv; + + status = ice_read_sr_word(hw, ICE_SR_PFA_PTR, &pfa_ptr); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Preserved Field Array pointer.\n"); + return status; + } + status = ice_read_sr_word(hw, pfa_ptr, &pfa_len); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to read PFA length.\n"); + return status; + } + /* Starting with first TLV after PFA length, iterate through the list + * of TLVs to find the requested one. + */ + next_tlv = pfa_ptr + 1; + while (next_tlv < pfa_ptr + pfa_len) { + u16 tlv_sub_module_type; + u16 tlv_len; + + /* Read TLV type */ + status = ice_read_sr_word(hw, next_tlv, &tlv_sub_module_type); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to read TLV type.\n"); + break; + } + /* Read TLV length */ + status = ice_read_sr_word(hw, next_tlv + 1, &tlv_len); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to read TLV length.\n"); + break; + } + if (tlv_sub_module_type == module_type) { + if (tlv_len) { + *module_tlv = next_tlv; + *module_tlv_len = tlv_len; + return 0; + } + return ICE_ERR_INVAL_SIZE; + } + /* Check next TLV, i.e. current TLV pointer + length + 2 words + * (for current TLV's type and length) + */ + next_tlv = next_tlv + tlv_len + 2; + } + /* Module does not exist */ + return ICE_ERR_DOES_NOT_EXIST; +} + +/** + * ice_read_pba_string - Reads part number string from NVM + * @hw: pointer to hardware structure + * @pba_num: stores the part number string from the NVM + * @pba_num_size: part number string buffer length + * + * Reads the part number string from the NVM. + */ +enum ice_status +ice_read_pba_string(struct ice_hw *hw, u8 *pba_num, u32 pba_num_size) +{ + u16 pba_tlv, pba_tlv_len; + enum ice_status status; + u16 pba_word, pba_size; + u16 i; + + status = ice_get_pfa_module_tlv(hw, &pba_tlv, &pba_tlv_len, + ICE_SR_PBA_BLOCK_PTR); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to read PBA Block TLV.\n"); + return status; + } + + /* pba_size is the next word */ + status = ice_read_sr_word(hw, (pba_tlv + 2), &pba_size); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to read PBA Section size.\n"); + return status; + } + + if (pba_tlv_len < pba_size) { + ice_debug(hw, ICE_DBG_INIT, "Invalid PBA Block TLV size.\n"); + return ICE_ERR_INVAL_SIZE; + } + + /* Subtract one to get PBA word count (PBA Size word is included in + * total size) + */ + pba_size--; + if (pba_num_size < (((u32)pba_size * 2) + 1)) { + ice_debug(hw, ICE_DBG_INIT, "Buffer too small for PBA data.\n"); + return ICE_ERR_PARAM; + } + + for (i = 0; i < pba_size; i++) { + status = ice_read_sr_word(hw, (pba_tlv + 2 + 1) + i, &pba_word); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to read PBA Block word %d.\n", i); + return status; + } + + pba_num[(i * 2)] = (pba_word >> 8) & 0xFF; + pba_num[(i * 2) + 1] = pba_word & 0xFF; + } + pba_num[(pba_size * 2)] = '\0'; + + return status; +} + +/** + * ice_get_orom_ver_info - Read Option ROM version information + * @hw: pointer to the HW struct + * + * Read the Combo Image version data from the Boot Configuration TLV and fill + * in the option ROM version data. + */ +static enum ice_status ice_get_orom_ver_info(struct ice_hw *hw) +{ + u16 combo_hi, combo_lo, boot_cfg_tlv, boot_cfg_tlv_len; + struct ice_orom_info *orom = &hw->nvm.orom; + enum ice_status status; + u32 combo_ver; + + status = ice_get_pfa_module_tlv(hw, &boot_cfg_tlv, &boot_cfg_tlv_len, + ICE_SR_BOOT_CFG_PTR); + if (status) { + ice_debug(hw, ICE_DBG_INIT, + "Failed to read Boot Configuration Block TLV.\n"); + return status; + } + + /* Boot Configuration Block must have length at least 2 words + * (Combo Image Version High and Combo Image Version Low) + */ + if (boot_cfg_tlv_len < 2) { + ice_debug(hw, ICE_DBG_INIT, + "Invalid Boot Configuration Block TLV size.\n"); + return ICE_ERR_INVAL_SIZE; + } + + status = ice_read_sr_word(hw, (boot_cfg_tlv + ICE_NVM_OROM_VER_OFF), + &combo_hi); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to read OROM_VER hi.\n"); + return status; + } + + status = ice_read_sr_word(hw, (boot_cfg_tlv + ICE_NVM_OROM_VER_OFF + 1), + &combo_lo); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to read OROM_VER lo.\n"); + return status; + } + + combo_ver = ((u32)combo_hi << 16) | combo_lo; + + orom->major = (u8)((combo_ver & ICE_OROM_VER_MASK) >> + ICE_OROM_VER_SHIFT); + orom->patch = (u8)(combo_ver & ICE_OROM_VER_PATCH_MASK); + orom->build = (u16)((combo_ver & ICE_OROM_VER_BUILD_MASK) >> + ICE_OROM_VER_BUILD_SHIFT); + + return 0; +} + +/** + * ice_discover_flash_size - Discover the available flash size. + * @hw: pointer to the HW struct + * + * The device flash could be up to 16MB in size. However, it is possible that + * the actual size is smaller. Use bisection to determine the accessible size + * of flash memory. + */ +static enum ice_status ice_discover_flash_size(struct ice_hw *hw) +{ + u32 min_size = 0, max_size = ICE_AQC_NVM_MAX_OFFSET + 1; + enum ice_status status; + + status = ice_acquire_nvm(hw, ICE_RES_READ); + if (status) + return status; + + while ((max_size - min_size) > 1) { + u32 offset = (max_size + min_size) / 2; + u32 len = 1; + u8 data; + + status = ice_read_flat_nvm(hw, offset, &len, &data, false); + if (status == ICE_ERR_AQ_ERROR && + hw->adminq.sq_last_status == ICE_AQ_RC_EINVAL) { + ice_debug(hw, ICE_DBG_NVM, + "%s: New upper bound of %u bytes\n", + __func__, offset); + status = 0; + max_size = offset; + } else if (!status) { + ice_debug(hw, ICE_DBG_NVM, + "%s: New lower bound of %u bytes\n", + __func__, offset); + min_size = offset; + } else { + /* an unexpected error occurred */ + goto err_read_flat_nvm; + } + } + + ice_debug(hw, ICE_DBG_NVM, + "Predicted flash size is %u bytes\n", max_size); + + hw->nvm.flash_size = max_size; + +err_read_flat_nvm: + ice_release_nvm(hw); + + return status; +} + +/** * ice_init_nvm - initializes NVM setting * @hw: pointer to the HW struct * @@ -241,9 +427,8 @@ enum ice_status ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data) */ enum ice_status ice_init_nvm(struct ice_hw *hw) { - u16 oem_hi, oem_lo, boot_cfg_tlv, boot_cfg_tlv_len; struct ice_nvm_info *nvm = &hw->nvm; - u16 eetrack_lo, eetrack_hi; + u16 eetrack_lo, eetrack_hi, ver; enum ice_status status; u32 fla, gens_stat; u8 sr_size; @@ -269,12 +454,14 @@ enum ice_status ice_init_nvm(struct ice_hw *hw) return ICE_ERR_NVM_BLANK_MODE; } - status = ice_read_sr_word(hw, ICE_SR_NVM_DEV_STARTER_VER, &nvm->ver); + status = ice_read_sr_word(hw, ICE_SR_NVM_DEV_STARTER_VER, &ver); if (status) { ice_debug(hw, ICE_DBG_INIT, "Failed to read DEV starter version.\n"); return status; } + nvm->major_ver = (ver & ICE_NVM_VER_HI_MASK) >> ICE_NVM_VER_HI_SHIFT; + nvm->minor_ver = (ver & ICE_NVM_VER_LO_MASK) >> ICE_NVM_VER_LO_SHIFT; status = ice_read_sr_word(hw, ICE_SR_NVM_EETRACK_LO, &eetrack_lo); if (status) { @@ -289,6 +476,13 @@ enum ice_status ice_init_nvm(struct ice_hw *hw) nvm->eetrack = (eetrack_hi << 16) | eetrack_lo; + status = ice_discover_flash_size(hw); + if (status) { + ice_debug(hw, ICE_DBG_NVM, + "NVM init error: failed to discover flash size.\n"); + return status; + } + switch (hw->device_id) { /* the following devices do not have boot_cfg_tlv yet */ case ICE_DEV_ID_E823C_BACKPLANE: @@ -315,68 +509,16 @@ enum ice_status ice_init_nvm(struct ice_hw *hw) break; } - status = ice_get_pfa_module_tlv(hw, &boot_cfg_tlv, &boot_cfg_tlv_len, - ICE_SR_BOOT_CFG_PTR); + status = ice_get_orom_ver_info(hw); if (status) { - ice_debug(hw, ICE_DBG_INIT, - "Failed to read Boot Configuration Block TLV.\n"); + ice_debug(hw, ICE_DBG_INIT, "Failed to read Option ROM info.\n"); return status; } - /* Boot Configuration Block must have length at least 2 words - * (Combo Image Version High and Combo Image Version Low) - */ - if (boot_cfg_tlv_len < 2) { - ice_debug(hw, ICE_DBG_INIT, - "Invalid Boot Configuration Block TLV size.\n"); - return ICE_ERR_INVAL_SIZE; - } - - status = ice_read_sr_word(hw, (boot_cfg_tlv + ICE_NVM_OEM_VER_OFF), - &oem_hi); - if (status) { - ice_debug(hw, ICE_DBG_INIT, "Failed to read OEM_VER hi.\n"); - return status; - } - - status = ice_read_sr_word(hw, (boot_cfg_tlv + ICE_NVM_OEM_VER_OFF + 1), - &oem_lo); - if (status) { - ice_debug(hw, ICE_DBG_INIT, "Failed to read OEM_VER lo.\n"); - return status; - } - - nvm->oem_ver = ((u32)oem_hi << 16) | oem_lo; - return 0; } /** - * ice_read_sr_buf - Reads Shadow RAM buf and acquire lock if necessary - * @hw: pointer to the HW structure - * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF) - * @words: (in) number of words to read; (out) number of words actually read - * @data: words read from the Shadow RAM - * - * Reads 16 bit words (data buf) from the SR using the ice_read_nvm_buf_aq - * method. The buf read is preceded by the NVM ownership take - * and followed by the release. - */ -enum ice_status -ice_read_sr_buf(struct ice_hw *hw, u16 offset, u16 *words, u16 *data) -{ - enum ice_status status; - - status = ice_acquire_nvm(hw, ICE_RES_READ); - if (!status) { - status = ice_read_sr_buf_aq(hw, offset, words, data); - ice_release_nvm(hw); - } - - return status; -} - -/** * ice_nvm_validate_checksum * @hw: pointer to the HW struct * diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.h b/drivers/net/ethernet/intel/ice/ice_nvm.h index a9fa011c22c6..999f273ba6ad 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.h +++ b/drivers/net/ethernet/intel/ice/ice_nvm.h @@ -4,5 +4,17 @@ #ifndef _ICE_NVM_H_ #define _ICE_NVM_H_ +enum ice_status +ice_acquire_nvm(struct ice_hw *hw, enum ice_aq_res_access_type access); +void ice_release_nvm(struct ice_hw *hw); +enum ice_status +ice_read_flat_nvm(struct ice_hw *hw, u32 offset, u32 *length, u8 *data, + bool read_shadow_ram); +enum ice_status +ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len, + u16 module_type); +enum ice_status +ice_read_pba_string(struct ice_hw *hw, u8 *pba_num, u32 pba_num_size); +enum ice_status ice_init_nvm(struct ice_hw *hw); enum ice_status ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data); #endif /* _ICE_NVM_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index db0ef6ba907f..4ce5f92fca4a 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -239,12 +239,21 @@ struct ice_fc_info { enum ice_fc_mode req_mode; /* FC mode requested by caller */ }; +/* Option ROM version information */ +struct ice_orom_info { + u8 major; /* Major version of OROM */ + u8 patch; /* Patch version of OROM */ + u16 build; /* Build version of OROM */ +}; + /* NVM Information */ struct ice_nvm_info { - u32 eetrack; /* NVM data version */ - u32 oem_ver; /* OEM version info */ - u16 sr_words; /* Shadow RAM size in words */ - u16 ver; /* NVM package version */ + struct ice_orom_info orom; /* Option ROM version info */ + u32 eetrack; /* NVM data version */ + u16 sr_words; /* Shadow RAM size in words */ + u32 flash_size; /* Size of available flash in bytes */ + u8 major_ver; /* major version of NVM package */ + u8 minor_ver; /* minor version of dev starter */ u8 blank_nvm_mode; /* is NVM empty (no FW present) */ }; @@ -626,7 +635,8 @@ struct ice_hw_port_stats { /* Checksum and Shadow RAM pointers */ #define ICE_SR_BOOT_CFG_PTR 0x132 -#define ICE_NVM_OEM_VER_OFF 0x02 +#define ICE_NVM_OROM_VER_OFF 0x02 +#define ICE_SR_PBA_BLOCK_PTR 0x16 #define ICE_SR_NVM_DEV_STARTER_VER 0x18 #define ICE_SR_NVM_EETRACK_LO 0x2D #define ICE_SR_NVM_EETRACK_HI 0x2E @@ -634,12 +644,12 @@ struct ice_hw_port_stats { #define ICE_NVM_VER_LO_MASK (0xff << ICE_NVM_VER_LO_SHIFT) #define ICE_NVM_VER_HI_SHIFT 12 #define ICE_NVM_VER_HI_MASK (0xf << ICE_NVM_VER_HI_SHIFT) -#define ICE_OEM_VER_PATCH_SHIFT 0 -#define ICE_OEM_VER_PATCH_MASK (0xff << ICE_OEM_VER_PATCH_SHIFT) -#define ICE_OEM_VER_BUILD_SHIFT 8 -#define ICE_OEM_VER_BUILD_MASK (0xffff << ICE_OEM_VER_BUILD_SHIFT) -#define ICE_OEM_VER_SHIFT 24 -#define ICE_OEM_VER_MASK (0xff << ICE_OEM_VER_SHIFT) +#define ICE_OROM_VER_PATCH_SHIFT 0 +#define ICE_OROM_VER_PATCH_MASK (0xff << ICE_OROM_VER_PATCH_SHIFT) +#define ICE_OROM_VER_BUILD_SHIFT 8 +#define ICE_OROM_VER_BUILD_MASK (0xffff << ICE_OROM_VER_BUILD_SHIFT) +#define ICE_OROM_VER_SHIFT 24 +#define ICE_OROM_VER_MASK (0xff << ICE_OROM_VER_SHIFT) #define ICE_SR_PFA_PTR 0x40 #define ICE_SR_SECTOR_SIZE_IN_WORDS 0x800 #define ICE_SR_WORDS_IN_1KB 512 diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index f96ffa83efbe..39d3b76a6f5d 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2183,27 +2183,6 @@ static int igb_set_coalesce(struct net_device *netdev, struct igb_adapter *adapter = netdev_priv(netdev); int i; - if (ec->rx_max_coalesced_frames || - ec->rx_coalesce_usecs_irq || - ec->rx_max_coalesced_frames_irq || - ec->tx_max_coalesced_frames || - ec->tx_coalesce_usecs_irq || - ec->stats_block_coalesce_usecs || - ec->use_adaptive_rx_coalesce || - ec->use_adaptive_tx_coalesce || - ec->pkt_rate_low || - ec->rx_coalesce_usecs_low || - ec->rx_max_coalesced_frames_low || - ec->tx_coalesce_usecs_low || - ec->tx_max_coalesced_frames_low || - ec->pkt_rate_high || - ec->rx_coalesce_usecs_high || - ec->rx_max_coalesced_frames_high || - ec->tx_coalesce_usecs_high || - ec->tx_max_coalesced_frames_high || - ec->rate_sample_interval) - return -ENOTSUPP; - if ((ec->rx_coalesce_usecs > IGB_MAX_ITR_USECS) || ((ec->rx_coalesce_usecs > 3) && (ec->rx_coalesce_usecs < IGB_MIN_ITR_USECS)) || @@ -3477,6 +3456,7 @@ static int igb_set_priv_flags(struct net_device *netdev, u32 priv_flags) } static const struct ethtool_ops igb_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS, .get_drvinfo = igb_get_drvinfo, .get_regs_len = igb_get_regs_len, .get_regs = igb_get_regs, diff --git a/drivers/net/ethernet/intel/igbvf/ethtool.c b/drivers/net/ethernet/intel/igbvf/ethtool.c index 3ae358b35227..9217d150e286 100644 --- a/drivers/net/ethernet/intel/igbvf/ethtool.c +++ b/drivers/net/ethernet/intel/igbvf/ethtool.c @@ -424,6 +424,7 @@ static void igbvf_get_strings(struct net_device *netdev, u32 stringset, } static const struct ethtool_ops igbvf_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS, .get_drvinfo = igbvf_get_drvinfo, .get_regs_len = igbvf_get_regs_len, .get_regs = igbvf_get_regs, diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 69f50b8e2af3..f530fc29b074 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -861,27 +861,6 @@ static int igc_set_coalesce(struct net_device *netdev, struct igc_adapter *adapter = netdev_priv(netdev); int i; - if (ec->rx_max_coalesced_frames || - ec->rx_coalesce_usecs_irq || - ec->rx_max_coalesced_frames_irq || - ec->tx_max_coalesced_frames || - ec->tx_coalesce_usecs_irq || - ec->stats_block_coalesce_usecs || - ec->use_adaptive_rx_coalesce || - ec->use_adaptive_tx_coalesce || - ec->pkt_rate_low || - ec->rx_coalesce_usecs_low || - ec->rx_max_coalesced_frames_low || - ec->tx_coalesce_usecs_low || - ec->tx_max_coalesced_frames_low || - ec->pkt_rate_high || - ec->rx_coalesce_usecs_high || - ec->rx_max_coalesced_frames_high || - ec->tx_coalesce_usecs_high || - ec->tx_max_coalesced_frames_high || - ec->rate_sample_interval) - return -ENOTSUPP; - if (ec->rx_coalesce_usecs > IGC_MAX_ITR_USECS || (ec->rx_coalesce_usecs > 3 && ec->rx_coalesce_usecs < IGC_MIN_ITR_USECS) || @@ -1915,6 +1894,7 @@ static int igc_set_link_ksettings(struct net_device *netdev, } static const struct ethtool_ops igc_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS, .get_drvinfo = igc_get_drvinfo, .get_regs_len = igc_get_regs_len, .get_regs = igc_get_regs, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 7c52ae8ac005..c6bf0a50ee63 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -3444,6 +3444,7 @@ static int ixgbe_set_priv_flags(struct net_device *netdev, u32 priv_flags) } static const struct ethtool_ops ixgbe_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS, .get_drvinfo = ixgbe_get_drvinfo, .get_regs_len = ixgbe_get_regs_len, .get_regs = ixgbe_get_regs, diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index f7f309c96fa8..988fa49fa99a 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -968,6 +968,7 @@ static int ixgbevf_set_priv_flags(struct net_device *netdev, u32 priv_flags) } static const struct ethtool_ops ixgbevf_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS, .get_drvinfo = ixgbevf_get_drvinfo, .get_regs_len = ixgbevf_get_regs_len, .get_regs = ixgbevf_get_regs, diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index de3c7ce9353c..c97c74164c73 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -2839,6 +2839,9 @@ jme_set_eeprom(struct net_device *netdev, } static const struct ethtool_ops jme_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USE_ADAPTIVE_RX, .get_drvinfo = jme_get_drvinfo, .get_regs_len = jme_get_regs_len, .get_regs = jme_get_regs, diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 3c8125cbc84d..81d24481b22c 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -1737,6 +1737,7 @@ static int mv643xx_eth_get_sset_count(struct net_device *dev, int sset) } static const struct ethtool_ops mv643xx_eth_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS, .get_drvinfo = mv643xx_eth_get_drvinfo, .nway_reset = phy_ethtool_nway_reset, .get_link = ethtool_op_get_link, diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c index 0b9e851f3da4..d2e2dc538428 100644 --- a/drivers/net/ethernet/marvell/mvmdio.c +++ b/drivers/net/ethernet/marvell/mvmdio.c @@ -347,7 +347,7 @@ static int orion_mdio_probe(struct platform_device *pdev) } - dev->err_interrupt = platform_get_irq(pdev, 0); + dev->err_interrupt = platform_get_irq_optional(pdev, 0); if (dev->err_interrupt > 0 && resource_size(r) < MVMDIO_ERR_INT_MASK + 4) { dev_err(&pdev->dev, @@ -364,8 +364,8 @@ static int orion_mdio_probe(struct platform_device *pdev) writel(MVMDIO_ERR_INT_SMI_DONE, dev->regs + MVMDIO_ERR_INT_MASK); - } else if (dev->err_interrupt == -EPROBE_DEFER) { - ret = -EPROBE_DEFER; + } else if (dev->err_interrupt < 0) { + ret = dev->err_interrupt; goto out_mdio; } diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index bc488e8b8e45..16f8b1f7b04f 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4838,6 +4838,8 @@ static const struct net_device_ops mvneta_netdev_ops = { }; static const struct ethtool_ops mvneta_eth_tool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS | + ETHTOOL_COALESCE_MAX_FRAMES, .nway_reset = mvneta_ethtool_nway_reset, .get_link = ethtool_op_get_link, .set_coalesce = mvneta_ethtool_set_coalesce, diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c index 0a0c6ec2336c..8972cdd559e8 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c @@ -1082,7 +1082,7 @@ static int mvpp2_port_c2_tcam_rule_add(struct mvpp2_port *port, u8 qh, ql, pmap; int index, ctx; - if (!flow_action_basic_hw_stats_types_check(&rule->flow->action, NULL)) + if (!flow_action_basic_hw_stats_check(&rule->flow->action, NULL)) return -EOPNOTSUPP; memset(&c2, 0, sizeof(c2)); @@ -1308,7 +1308,7 @@ static int mvpp2_cls_rfs_parse_rule(struct mvpp2_rfs_rule *rule) struct flow_rule *flow = rule->flow; struct flow_action_entry *act; - if (!flow_action_basic_hw_stats_types_check(&rule->flow->action, NULL)) + if (!flow_action_basic_hw_stats_check(&rule->flow->action, NULL)) return -EOPNOTSUPP; act = &flow->action.entries[0]; diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 6b9c7ed2547e..1fa60e985b43 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -4384,6 +4384,8 @@ static const struct net_device_ops mvpp2_netdev_ops = { }; static const struct ethtool_ops mvpp2_eth_tool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES, .nway_reset = mvpp2_ethtool_nway_reset, .get_link = ethtool_op_get_link, .set_coalesce = mvpp2_ethtool_set_coalesce, diff --git a/drivers/net/ethernet/marvell/octeontx2/Kconfig b/drivers/net/ethernet/marvell/octeontx2/Kconfig index ced514c05c97..d9dfb614daa6 100644 --- a/drivers/net/ethernet/marvell/octeontx2/Kconfig +++ b/drivers/net/ethernet/marvell/octeontx2/Kconfig @@ -33,3 +33,9 @@ config OCTEONTX2_PF depends on PCI help This driver supports Marvell's OcteonTX2 NIC physical function. + +config OCTEONTX2_VF + tristate "Marvell OcteonTX2 NIC Virtual Function driver" + depends on OCTEONTX2_PF + help + This driver supports Marvell's OcteonTX2 NIC virtual function. diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 5ff25bf8419e..557e4292c846 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -21,7 +21,6 @@ #define DRV_NAME "octeontx2-af" #define DRV_STRING "Marvell OcteonTX2 RVU Admin Function Driver" -#define DRV_VERSION "1.0" static int rvu_get_hwvf(struct rvu *rvu, int pcifunc); @@ -46,10 +45,9 @@ static const struct pci_device_id rvu_id_table[] = { { 0, } /* end of table */ }; -MODULE_AUTHOR("Marvell International Ltd."); +MODULE_AUTHOR("Sunil Goutham <sgoutham@marvell.com>"); MODULE_DESCRIPTION(DRV_STRING); MODULE_LICENSE("GPL v2"); -MODULE_VERSION(DRV_VERSION); MODULE_DEVICE_TABLE(pci, rvu_id_table); static char *mkex_profile; /* MKEX profile name */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile index 41bf00cf5b1d..778df331c8ac 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile @@ -4,7 +4,9 @@ # obj-$(CONFIG_OCTEONTX2_PF) += octeontx2_nicpf.o +obj-$(CONFIG_OCTEONTX2_VF) += octeontx2_nicvf.o octeontx2_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o +octeontx2_nicvf-y := otx2_vf.o ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index af4437d4dfcb..f1d2dea90a8c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -49,15 +49,15 @@ void otx2_update_lmac_stats(struct otx2_nic *pfvf) if (!netif_running(pfvf->netdev)) return; - otx2_mbox_lock(&pfvf->mbox); + mutex_lock(&pfvf->mbox.lock); req = otx2_mbox_alloc_msg_cgx_stats(&pfvf->mbox); if (!req) { - otx2_mbox_unlock(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); return; } otx2_sync_mbox_msg(&pfvf->mbox); - otx2_mbox_unlock(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); } int otx2_update_rq_stats(struct otx2_nic *pfvf, int qidx) @@ -128,6 +128,7 @@ void otx2_get_stats64(struct net_device *netdev, stats->tx_packets = dev_stats->tx_frames; stats->tx_dropped = dev_stats->tx_drops; } +EXPORT_SYMBOL(otx2_get_stats64); /* Sync MAC address with RVU AF */ static int otx2_hw_set_mac_addr(struct otx2_nic *pfvf, u8 *mac) @@ -135,17 +136,17 @@ static int otx2_hw_set_mac_addr(struct otx2_nic *pfvf, u8 *mac) struct nix_set_mac_addr *req; int err; - otx2_mbox_lock(&pfvf->mbox); + mutex_lock(&pfvf->mbox.lock); req = otx2_mbox_alloc_msg_nix_set_mac_addr(&pfvf->mbox); if (!req) { - otx2_mbox_unlock(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); return -ENOMEM; } ether_addr_copy(req->mac_addr, mac); err = otx2_sync_mbox_msg(&pfvf->mbox); - otx2_mbox_unlock(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); return err; } @@ -157,27 +158,27 @@ static int otx2_hw_get_mac_addr(struct otx2_nic *pfvf, struct msg_req *req; int err; - otx2_mbox_lock(&pfvf->mbox); + mutex_lock(&pfvf->mbox.lock); req = otx2_mbox_alloc_msg_nix_get_mac_addr(&pfvf->mbox); if (!req) { - otx2_mbox_unlock(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); return -ENOMEM; } err = otx2_sync_mbox_msg(&pfvf->mbox); if (err) { - otx2_mbox_unlock(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); return err; } msghdr = otx2_mbox_get_rsp(&pfvf->mbox.mbox, 0, &req->hdr); if (IS_ERR(msghdr)) { - otx2_mbox_unlock(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); return PTR_ERR(msghdr); } rsp = (struct nix_get_mac_addr_rsp *)msghdr; ether_addr_copy(netdev->dev_addr, rsp->mac_addr); - otx2_mbox_unlock(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); return 0; } @@ -197,26 +198,25 @@ int otx2_set_mac_address(struct net_device *netdev, void *p) return 0; } +EXPORT_SYMBOL(otx2_set_mac_address); int otx2_hw_set_mtu(struct otx2_nic *pfvf, int mtu) { struct nix_frs_cfg *req; int err; - otx2_mbox_lock(&pfvf->mbox); + mutex_lock(&pfvf->mbox.lock); req = otx2_mbox_alloc_msg_nix_set_hw_frs(&pfvf->mbox); if (!req) { - otx2_mbox_unlock(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); return -ENOMEM; } - /* SMQ config limits maximum pkt size that can be transmitted */ - req->update_smq = true; pfvf->max_frs = mtu + OTX2_ETH_HLEN; req->maxlen = pfvf->max_frs; err = otx2_sync_mbox_msg(&pfvf->mbox); - otx2_mbox_unlock(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); return err; } @@ -225,17 +225,23 @@ int otx2_config_pause_frm(struct otx2_nic *pfvf) struct cgx_pause_frm_cfg *req; int err; - otx2_mbox_lock(&pfvf->mbox); + if (is_otx2_lbkvf(pfvf->pdev)) + return 0; + + mutex_lock(&pfvf->mbox.lock); req = otx2_mbox_alloc_msg_cgx_cfg_pause_frm(&pfvf->mbox); - if (!req) - return -ENOMEM; + if (!req) { + err = -ENOMEM; + goto unlock; + } req->rx_pause = !!(pfvf->flags & OTX2_FLAG_RX_PAUSE_ENABLED); req->tx_pause = !!(pfvf->flags & OTX2_FLAG_TX_PAUSE_ENABLED); req->set = 1; err = otx2_sync_mbox_msg(&pfvf->mbox); - otx2_mbox_unlock(&pfvf->mbox); +unlock: + mutex_unlock(&pfvf->mbox.lock); return err; } @@ -245,10 +251,10 @@ int otx2_set_flowkey_cfg(struct otx2_nic *pfvf) struct nix_rss_flowkey_cfg *req; int err; - otx2_mbox_lock(&pfvf->mbox); + mutex_lock(&pfvf->mbox.lock); req = otx2_mbox_alloc_msg_nix_rss_flowkey_cfg(&pfvf->mbox); if (!req) { - otx2_mbox_unlock(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); return -ENOMEM; } req->mcam_index = -1; /* Default or reserved index */ @@ -256,7 +262,7 @@ int otx2_set_flowkey_cfg(struct otx2_nic *pfvf) req->group = DEFAULT_RSS_CONTEXT_GROUP; err = otx2_sync_mbox_msg(&pfvf->mbox); - otx2_mbox_unlock(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); return err; } @@ -267,7 +273,7 @@ int otx2_set_rss_table(struct otx2_nic *pfvf) struct nix_aq_enq_req *aq; int idx, err; - otx2_mbox_lock(mbox); + mutex_lock(&mbox->lock); /* Get memory to put this msg */ for (idx = 0; idx < rss->rss_size; idx++) { aq = otx2_mbox_alloc_msg_nix_aq_enq(mbox); @@ -277,12 +283,12 @@ int otx2_set_rss_table(struct otx2_nic *pfvf) */ err = otx2_sync_mbox_msg(mbox); if (err) { - otx2_mbox_unlock(mbox); + mutex_unlock(&mbox->lock); return err; } aq = otx2_mbox_alloc_msg_nix_aq_enq(mbox); if (!aq) { - otx2_mbox_unlock(mbox); + mutex_unlock(&mbox->lock); return -ENOMEM; } } @@ -295,7 +301,7 @@ int otx2_set_rss_table(struct otx2_nic *pfvf) aq->op = NIX_AQ_INSTOP_INIT; } err = otx2_sync_mbox_msg(mbox); - otx2_mbox_unlock(mbox); + mutex_unlock(&mbox->lock); return err; } @@ -413,6 +419,7 @@ void otx2_tx_timeout(struct net_device *netdev, unsigned int txq) schedule_work(&pfvf->reset_task); } +EXPORT_SYMBOL(otx2_tx_timeout); void otx2_get_mac_from_af(struct net_device *netdev) { @@ -427,6 +434,7 @@ void otx2_get_mac_from_af(struct net_device *netdev) if (!is_valid_ether_addr(netdev->dev_addr)) eth_hw_addr_random(netdev); } +EXPORT_SYMBOL(otx2_get_mac_from_af); static int otx2_get_link(struct otx2_nic *pfvf) { @@ -462,7 +470,7 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl) /* Set topology e.t.c configuration */ if (lvl == NIX_TXSCH_LVL_SMQ) { req->reg[0] = NIX_AF_SMQX_CFG(schq); - req->regval[0] = ((pfvf->netdev->mtu + OTX2_ETH_HLEN) << 8) | + req->regval[0] = ((OTX2_MAX_MTU + OTX2_ETH_HLEN) << 8) | OTX2_MIN_MTU; req->regval[0] |= (0x20ULL << 51) | (0x80ULL << 39) | @@ -548,17 +556,17 @@ int otx2_txschq_stop(struct otx2_nic *pfvf) struct nix_txsch_free_req *free_req; int lvl, schq, err; - otx2_mbox_lock(&pfvf->mbox); + mutex_lock(&pfvf->mbox.lock); /* Free the transmit schedulers */ free_req = otx2_mbox_alloc_msg_nix_txsch_free(&pfvf->mbox); if (!free_req) { - otx2_mbox_unlock(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); return -ENOMEM; } free_req->flags = TXSCHQ_FREE_ALL; err = otx2_sync_mbox_msg(&pfvf->mbox); - otx2_mbox_unlock(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); /* Clear the txschq list */ for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) { @@ -572,17 +580,19 @@ void otx2_sqb_flush(struct otx2_nic *pfvf) { int qidx, sqe_tail, sqe_head; u64 incr, *ptr, val; + int timeout = 1000; ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_STATUS); for (qidx = 0; qidx < pfvf->hw.tx_queues; qidx++) { incr = (u64)qidx << 32; - while (1) { + while (timeout) { val = otx2_atomic64_add(incr, ptr); sqe_head = (val >> 20) & 0x3F; sqe_tail = (val >> 28) & 0x3F; if (sqe_head == sqe_tail) break; usleep_range(1, 3); + timeout--; } } } @@ -978,6 +988,7 @@ void otx2_aura_pool_free(struct otx2_nic *pfvf) qmem_free(pfvf->dev, pool->fc_addr); } devm_kfree(pfvf->dev, pfvf->qset.pool); + pfvf->qset.pool = NULL; } static int otx2_aura_init(struct otx2_nic *pfvf, int aura_id, @@ -1245,10 +1256,10 @@ int otx2_detach_resources(struct mbox *mbox) { struct rsrc_detach *detach; - otx2_mbox_lock(mbox); + mutex_lock(&mbox->lock); detach = otx2_mbox_alloc_msg_detach_resources(mbox); if (!detach) { - otx2_mbox_unlock(mbox); + mutex_unlock(&mbox->lock); return -ENOMEM; } @@ -1257,9 +1268,10 @@ int otx2_detach_resources(struct mbox *mbox) /* Send detach request to AF */ otx2_mbox_msg_send(&mbox->mbox, 0); - otx2_mbox_unlock(mbox); + mutex_unlock(&mbox->lock); return 0; } +EXPORT_SYMBOL(otx2_detach_resources); int otx2_attach_npa_nix(struct otx2_nic *pfvf) { @@ -1267,11 +1279,11 @@ int otx2_attach_npa_nix(struct otx2_nic *pfvf) struct msg_req *msix; int err; - otx2_mbox_lock(&pfvf->mbox); + mutex_lock(&pfvf->mbox.lock); /* Get memory to put this msg */ attach = otx2_mbox_alloc_msg_attach_resources(&pfvf->mbox); if (!attach) { - otx2_mbox_unlock(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); return -ENOMEM; } @@ -1281,7 +1293,7 @@ int otx2_attach_npa_nix(struct otx2_nic *pfvf) /* Send attach request to AF */ err = otx2_sync_mbox_msg(&pfvf->mbox); if (err) { - otx2_mbox_unlock(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); return err; } @@ -1296,16 +1308,16 @@ int otx2_attach_npa_nix(struct otx2_nic *pfvf) /* Get NPA and NIX MSIX vector offsets */ msix = otx2_mbox_alloc_msg_msix_offset(&pfvf->mbox); if (!msix) { - otx2_mbox_unlock(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); return -ENOMEM; } err = otx2_sync_mbox_msg(&pfvf->mbox); if (err) { - otx2_mbox_unlock(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); return err; } - otx2_mbox_unlock(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); if (pfvf->hw.npa_msixoff == MSIX_VECTOR_INVALID || pfvf->hw.nix_msixoff == MSIX_VECTOR_INVALID) { @@ -1316,12 +1328,13 @@ int otx2_attach_npa_nix(struct otx2_nic *pfvf) return 0; } +EXPORT_SYMBOL(otx2_attach_npa_nix); void otx2_ctx_disable(struct mbox *mbox, int type, bool npa) { struct hwctx_disable_req *req; - otx2_mbox_lock(mbox); + mutex_lock(&mbox->lock); /* Request AQ to disable this context */ if (npa) req = otx2_mbox_alloc_msg_npa_hwctx_disable(mbox); @@ -1329,7 +1342,7 @@ void otx2_ctx_disable(struct mbox *mbox, int type, bool npa) req = otx2_mbox_alloc_msg_nix_hwctx_disable(mbox); if (!req) { - otx2_mbox_unlock(mbox); + mutex_unlock(&mbox->lock); return; } @@ -1339,7 +1352,7 @@ void otx2_ctx_disable(struct mbox *mbox, int type, bool npa) dev_err(mbox->pfvf->dev, "%s failed to disable context\n", __func__); - otx2_mbox_unlock(mbox); + mutex_unlock(&mbox->lock); } int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable) @@ -1384,6 +1397,7 @@ void mbox_handler_nix_txsch_alloc(struct otx2_nic *pf, pf->hw.txschq_list[lvl][schq] = rsp->schq_list[lvl][schq]; } +EXPORT_SYMBOL(mbox_handler_nix_txsch_alloc); void mbox_handler_npa_lf_alloc(struct otx2_nic *pfvf, struct npa_lf_alloc_rsp *rsp) @@ -1391,6 +1405,7 @@ void mbox_handler_npa_lf_alloc(struct otx2_nic *pfvf, pfvf->hw.stack_pg_ptrs = rsp->stack_pg_ptrs; pfvf->hw.stack_pg_bytes = rsp->stack_pg_bytes; } +EXPORT_SYMBOL(mbox_handler_npa_lf_alloc); void mbox_handler_nix_lf_alloc(struct otx2_nic *pfvf, struct nix_lf_alloc_rsp *rsp) @@ -1401,6 +1416,7 @@ void mbox_handler_nix_lf_alloc(struct otx2_nic *pfvf, pfvf->hw.lso_tsov4_idx = rsp->lso_tsov4_idx; pfvf->hw.lso_tsov6_idx = rsp->lso_tsov6_idx; } +EXPORT_SYMBOL(mbox_handler_nix_lf_alloc); void mbox_handler_msix_offset(struct otx2_nic *pfvf, struct msix_offset_rsp *rsp) @@ -1408,6 +1424,7 @@ void mbox_handler_msix_offset(struct otx2_nic *pfvf, pfvf->hw.npa_msixoff = rsp->npa_msixoff; pfvf->hw.nix_msixoff = rsp->nix_msixoff; } +EXPORT_SYMBOL(mbox_handler_msix_offset); void mbox_handler_nix_bp_enable(struct otx2_nic *pfvf, struct nix_bp_cfg_rsp *rsp) @@ -1419,6 +1436,7 @@ void mbox_handler_nix_bp_enable(struct otx2_nic *pfvf, pfvf->bpid[chan_id] = rsp->chan_bpid[chan] & 0x3FF; } } +EXPORT_SYMBOL(mbox_handler_nix_bp_enable); void otx2_free_cints(struct otx2_nic *pfvf, int n) { diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 885c3dcd4ac7..eaff5f6bf856 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -20,6 +20,8 @@ /* PCI device IDs */ #define PCI_DEVID_OCTEONTX2_RVU_PF 0xA063 +#define PCI_DEVID_OCTEONTX2_RVU_VF 0xA064 +#define PCI_DEVID_OCTEONTX2_RVU_AFVF 0xA0F8 #define PCI_SUBSYS_DEVID_96XX_RVU_PFVF 0xB200 @@ -191,6 +193,17 @@ struct otx2_hw { u64 cgx_tx_stats[CGX_TX_STATS_COUNT]; }; +struct otx2_vf_config { + struct otx2_nic *pf; + struct delayed_work link_event_work; + bool intf_down; /* interface was either configured or not */ +}; + +struct flr_work { + struct work_struct work; + struct otx2_nic *pf; +}; + struct refill_work { struct delayed_work pool_refill_work; struct otx2_nic *pf; @@ -215,14 +228,20 @@ struct otx2_nic { /* Mbox */ struct mbox mbox; + struct mbox *mbox_pfvf; struct workqueue_struct *mbox_wq; + struct workqueue_struct *mbox_pfvf_wq; + u8 total_vfs; u16 pcifunc; /* RVU PF_FUNC */ u16 bpid[NIX_MAX_BPID_CHAN]; + struct otx2_vf_config *vf_configs; struct cgx_link_user_info linfo; u64 reset_count; struct work_struct reset_task; + struct workqueue_struct *flr_wq; + struct flr_work *flr_wrk; struct refill_work *refill_wrk; /* Ethtool stuff */ @@ -232,6 +251,11 @@ struct otx2_nic { int nix_blkaddr; }; +static inline bool is_otx2_lbkvf(struct pci_dev *pdev) +{ + return pdev->device == PCI_DEVID_OCTEONTX2_RVU_AFVF; +} + static inline bool is_96xx_A0(struct pci_dev *pdev) { return (pdev->revision == 0x00) && @@ -351,21 +375,6 @@ static inline void otx2_sync_mbox_bbuf(struct otx2_mbox *mbox, int devid) hw_mbase + mbox->rx_start, msg_size + msgs_offset); } -static inline void otx2_mbox_lock_init(struct mbox *mbox) -{ - mutex_init(&mbox->lock); -} - -static inline void otx2_mbox_lock(struct mbox *mbox) -{ - mutex_lock(&mbox->lock); -} - -static inline void otx2_mbox_unlock(struct mbox *mbox) -{ - mutex_unlock(&mbox->lock); -} - /* With the absence of API for 128-bit IO memory access for arm64, * implement required operations at place. */ @@ -614,6 +623,7 @@ void otx2_update_lmac_stats(struct otx2_nic *pfvf); int otx2_update_rq_stats(struct otx2_nic *pfvf, int qidx); int otx2_update_sq_stats(struct otx2_nic *pfvf, int qidx); void otx2_set_ethtool_ops(struct net_device *netdev); +void otx2vf_set_ethtool_ops(struct net_device *netdev); int otx2_open(struct net_device *netdev); int otx2_stop(struct net_device *netdev); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index f450111423a8..d59f5a9c7273 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -17,6 +17,7 @@ #include "otx2_common.h" #define DRV_NAME "octeontx2-nicpf" +#define DRV_VF_NAME "octeontx2-nicvf" struct otx2_stat { char name[ETH_GSTRING_LEN]; @@ -63,16 +64,6 @@ static const unsigned int otx2_n_dev_stats = ARRAY_SIZE(otx2_dev_stats); static const unsigned int otx2_n_drv_stats = ARRAY_SIZE(otx2_drv_stats); static const unsigned int otx2_n_queue_stats = ARRAY_SIZE(otx2_queue_stats); -static void otx2_dev_open(struct net_device *netdev) -{ - otx2_open(netdev); -} - -static void otx2_dev_stop(struct net_device *netdev) -{ - otx2_stop(netdev); -} - static void otx2_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info) { @@ -232,7 +223,7 @@ static int otx2_set_channels(struct net_device *dev, return -EINVAL; if (if_up) - otx2_dev_stop(dev); + dev->netdev_ops->ndo_stop(dev); err = otx2_set_real_num_queues(dev, channel->tx_count, channel->rx_count); @@ -245,7 +236,7 @@ static int otx2_set_channels(struct net_device *dev, fail: if (if_up) - otx2_dev_open(dev); + dev->netdev_ops->ndo_open(dev); netdev_info(dev, "Setting num Tx rings to %d, Rx rings to %d success\n", pfvf->hw.tx_queues, pfvf->hw.rx_queues); @@ -259,6 +250,9 @@ static void otx2_get_pauseparam(struct net_device *netdev, struct otx2_nic *pfvf = netdev_priv(netdev); struct cgx_pause_frm_cfg *req, *rsp; + if (is_otx2_lbkvf(pfvf->pdev)) + return; + req = otx2_mbox_alloc_msg_cgx_cfg_pause_frm(&pfvf->mbox); if (!req) return; @@ -279,6 +273,9 @@ static int otx2_set_pauseparam(struct net_device *netdev, if (pause->autoneg) return -EOPNOTSUPP; + if (is_otx2_lbkvf(pfvf->pdev)) + return -EOPNOTSUPP; + if (pause->rx_pause) pfvf->flags |= OTX2_FLAG_RX_PAUSE_ENABLED; else @@ -336,14 +333,15 @@ static int otx2_set_ringparam(struct net_device *netdev, return 0; if (if_up) - otx2_dev_stop(netdev); + netdev->netdev_ops->ndo_stop(netdev); /* Assigned to the nearest possible exponent. */ qs->sqe_cnt = tx_count; qs->rqe_cnt = rx_count; if (if_up) - otx2_dev_open(netdev); + netdev->netdev_ops->ndo_open(netdev); + return 0; } @@ -368,17 +366,6 @@ static int otx2_set_coalesce(struct net_device *netdev, struct otx2_hw *hw = &pfvf->hw; int qidx; - if (ec->use_adaptive_rx_coalesce || ec->use_adaptive_tx_coalesce || - ec->rx_coalesce_usecs_irq || ec->rx_max_coalesced_frames_irq || - ec->tx_coalesce_usecs_irq || ec->tx_max_coalesced_frames_irq || - ec->stats_block_coalesce_usecs || ec->pkt_rate_low || - ec->rx_coalesce_usecs_low || ec->rx_max_coalesced_frames_low || - ec->tx_coalesce_usecs_low || ec->tx_max_coalesced_frames_low || - ec->pkt_rate_high || ec->rx_coalesce_usecs_high || - ec->rx_max_coalesced_frames_high || ec->tx_coalesce_usecs_high || - ec->tx_max_coalesced_frames_high || ec->rate_sample_interval) - return -EOPNOTSUPP; - if (!ec->rx_max_coalesced_frames || !ec->tx_max_coalesced_frames) return 0; @@ -670,10 +657,15 @@ static u32 otx2_get_link(struct net_device *netdev) { struct otx2_nic *pfvf = netdev_priv(netdev); + /* LBK link is internal and always UP */ + if (is_otx2_lbkvf(pfvf->pdev)) + return 1; return pfvf->linfo.link_up; } static const struct ethtool_ops otx2_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES, .get_link = otx2_get_link, .get_drvinfo = otx2_get_drvinfo, .get_strings = otx2_get_strings, @@ -701,3 +693,102 @@ void otx2_set_ethtool_ops(struct net_device *netdev) { netdev->ethtool_ops = &otx2_ethtool_ops; } + +/* VF's ethtool APIs */ +static void otx2vf_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *info) +{ + struct otx2_nic *vf = netdev_priv(netdev); + + strlcpy(info->driver, DRV_VF_NAME, sizeof(info->driver)); + strlcpy(info->bus_info, pci_name(vf->pdev), sizeof(info->bus_info)); +} + +static void otx2vf_get_strings(struct net_device *netdev, u32 sset, u8 *data) +{ + struct otx2_nic *vf = netdev_priv(netdev); + int stats; + + if (sset != ETH_SS_STATS) + return; + + for (stats = 0; stats < otx2_n_dev_stats; stats++) { + memcpy(data, otx2_dev_stats[stats].name, ETH_GSTRING_LEN); + data += ETH_GSTRING_LEN; + } + + for (stats = 0; stats < otx2_n_drv_stats; stats++) { + memcpy(data, otx2_drv_stats[stats].name, ETH_GSTRING_LEN); + data += ETH_GSTRING_LEN; + } + + otx2_get_qset_strings(vf, &data, 0); + + strcpy(data, "reset_count"); + data += ETH_GSTRING_LEN; +} + +static void otx2vf_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *data) +{ + struct otx2_nic *vf = netdev_priv(netdev); + int stat; + + otx2_get_dev_stats(vf); + for (stat = 0; stat < otx2_n_dev_stats; stat++) + *(data++) = ((u64 *)&vf->hw.dev_stats) + [otx2_dev_stats[stat].index]; + + for (stat = 0; stat < otx2_n_drv_stats; stat++) + *(data++) = atomic_read(&((atomic_t *)&vf->hw.drv_stats) + [otx2_drv_stats[stat].index]); + + otx2_get_qset_stats(vf, stats, &data); + *(data++) = vf->reset_count; +} + +static int otx2vf_get_sset_count(struct net_device *netdev, int sset) +{ + struct otx2_nic *vf = netdev_priv(netdev); + int qstats_count; + + if (sset != ETH_SS_STATS) + return -EINVAL; + + qstats_count = otx2_n_queue_stats * + (vf->hw.rx_queues + vf->hw.tx_queues); + + return otx2_n_dev_stats + otx2_n_drv_stats + qstats_count + 1; +} + +static const struct ethtool_ops otx2vf_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES, + .get_link = otx2_get_link, + .get_drvinfo = otx2vf_get_drvinfo, + .get_strings = otx2vf_get_strings, + .get_ethtool_stats = otx2vf_get_ethtool_stats, + .get_sset_count = otx2vf_get_sset_count, + .set_channels = otx2_set_channels, + .get_channels = otx2_get_channels, + .get_rxnfc = otx2_get_rxnfc, + .set_rxnfc = otx2_set_rxnfc, + .get_rxfh_key_size = otx2_get_rxfh_key_size, + .get_rxfh_indir_size = otx2_get_rxfh_indir_size, + .get_rxfh = otx2_get_rxfh, + .set_rxfh = otx2_set_rxfh, + .get_ringparam = otx2_get_ringparam, + .set_ringparam = otx2_set_ringparam, + .get_coalesce = otx2_get_coalesce, + .set_coalesce = otx2_set_coalesce, + .get_msglevel = otx2_get_msglevel, + .set_msglevel = otx2_set_msglevel, + .get_pauseparam = otx2_get_pauseparam, + .set_pauseparam = otx2_set_pauseparam, +}; + +void otx2vf_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &otx2vf_ethtool_ops; +} +EXPORT_SYMBOL(otx2vf_set_ethtool_ops); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 22f9a326fd81..4618c90268ae 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -24,7 +24,6 @@ #define DRV_NAME "octeontx2-nicpf" #define DRV_STRING "Marvell OcteonTX2 NIC Physical Function Driver" -#define DRV_VERSION "1.0" /* Supported devices */ static const struct pci_device_id otx2_pf_id_table[] = { @@ -32,10 +31,9 @@ static const struct pci_device_id otx2_pf_id_table[] = { { 0, } /* end of table */ }; -MODULE_AUTHOR("Marvell International Ltd."); +MODULE_AUTHOR("Sunil Goutham <sgoutham@marvell.com>"); MODULE_DESCRIPTION(DRV_STRING); MODULE_LICENSE("GPL v2"); -MODULE_VERSION(DRV_VERSION); MODULE_DEVICE_TABLE(pci, otx2_pf_id_table); enum { @@ -61,6 +59,224 @@ static int otx2_change_mtu(struct net_device *netdev, int new_mtu) return err; } +static void otx2_disable_flr_me_intr(struct otx2_nic *pf) +{ + int irq, vfs = pf->total_vfs; + + /* Disable VFs ME interrupts */ + otx2_write64(pf, RVU_PF_VFME_INT_ENA_W1CX(0), INTR_MASK(vfs)); + irq = pci_irq_vector(pf->pdev, RVU_PF_INT_VEC_VFME0); + free_irq(irq, pf); + + /* Disable VFs FLR interrupts */ + otx2_write64(pf, RVU_PF_VFFLR_INT_ENA_W1CX(0), INTR_MASK(vfs)); + irq = pci_irq_vector(pf->pdev, RVU_PF_INT_VEC_VFFLR0); + free_irq(irq, pf); + + if (vfs <= 64) + return; + + otx2_write64(pf, RVU_PF_VFME_INT_ENA_W1CX(1), INTR_MASK(vfs - 64)); + irq = pci_irq_vector(pf->pdev, RVU_PF_INT_VEC_VFME1); + free_irq(irq, pf); + + otx2_write64(pf, RVU_PF_VFFLR_INT_ENA_W1CX(1), INTR_MASK(vfs - 64)); + irq = pci_irq_vector(pf->pdev, RVU_PF_INT_VEC_VFFLR1); + free_irq(irq, pf); +} + +static void otx2_flr_wq_destroy(struct otx2_nic *pf) +{ + if (!pf->flr_wq) + return; + destroy_workqueue(pf->flr_wq); + pf->flr_wq = NULL; + devm_kfree(pf->dev, pf->flr_wrk); +} + +static void otx2_flr_handler(struct work_struct *work) +{ + struct flr_work *flrwork = container_of(work, struct flr_work, work); + struct otx2_nic *pf = flrwork->pf; + struct mbox *mbox = &pf->mbox; + struct msg_req *req; + int vf, reg = 0; + + vf = flrwork - pf->flr_wrk; + + mutex_lock(&mbox->lock); + req = otx2_mbox_alloc_msg_vf_flr(mbox); + if (!req) { + mutex_unlock(&mbox->lock); + return; + } + req->hdr.pcifunc &= RVU_PFVF_FUNC_MASK; + req->hdr.pcifunc |= (vf + 1) & RVU_PFVF_FUNC_MASK; + + if (!otx2_sync_mbox_msg(&pf->mbox)) { + if (vf >= 64) { + reg = 1; + vf = vf - 64; + } + /* clear transcation pending bit */ + otx2_write64(pf, RVU_PF_VFTRPENDX(reg), BIT_ULL(vf)); + otx2_write64(pf, RVU_PF_VFFLR_INT_ENA_W1SX(reg), BIT_ULL(vf)); + } + + mutex_unlock(&mbox->lock); +} + +static irqreturn_t otx2_pf_flr_intr_handler(int irq, void *pf_irq) +{ + struct otx2_nic *pf = (struct otx2_nic *)pf_irq; + int reg, dev, vf, start_vf, num_reg = 1; + u64 intr; + + if (pf->total_vfs > 64) + num_reg = 2; + + for (reg = 0; reg < num_reg; reg++) { + intr = otx2_read64(pf, RVU_PF_VFFLR_INTX(reg)); + if (!intr) + continue; + start_vf = 64 * reg; + for (vf = 0; vf < 64; vf++) { + if (!(intr & BIT_ULL(vf))) + continue; + dev = vf + start_vf; + queue_work(pf->flr_wq, &pf->flr_wrk[dev].work); + /* Clear interrupt */ + otx2_write64(pf, RVU_PF_VFFLR_INTX(reg), BIT_ULL(vf)); + /* Disable the interrupt */ + otx2_write64(pf, RVU_PF_VFFLR_INT_ENA_W1CX(reg), + BIT_ULL(vf)); + } + } + return IRQ_HANDLED; +} + +static irqreturn_t otx2_pf_me_intr_handler(int irq, void *pf_irq) +{ + struct otx2_nic *pf = (struct otx2_nic *)pf_irq; + int vf, reg, num_reg = 1; + u64 intr; + + if (pf->total_vfs > 64) + num_reg = 2; + + for (reg = 0; reg < num_reg; reg++) { + intr = otx2_read64(pf, RVU_PF_VFME_INTX(reg)); + if (!intr) + continue; + for (vf = 0; vf < 64; vf++) { + if (!(intr & BIT_ULL(vf))) + continue; + /* clear trpend bit */ + otx2_write64(pf, RVU_PF_VFTRPENDX(reg), BIT_ULL(vf)); + /* clear interrupt */ + otx2_write64(pf, RVU_PF_VFME_INTX(reg), BIT_ULL(vf)); + } + } + return IRQ_HANDLED; +} + +static int otx2_register_flr_me_intr(struct otx2_nic *pf, int numvfs) +{ + struct otx2_hw *hw = &pf->hw; + char *irq_name; + int ret; + + /* Register ME interrupt handler*/ + irq_name = &hw->irq_name[RVU_PF_INT_VEC_VFME0 * NAME_SIZE]; + snprintf(irq_name, NAME_SIZE, "RVUPF%d_ME0", rvu_get_pf(pf->pcifunc)); + ret = request_irq(pci_irq_vector(pf->pdev, RVU_PF_INT_VEC_VFME0), + otx2_pf_me_intr_handler, 0, irq_name, pf); + if (ret) { + dev_err(pf->dev, + "RVUPF: IRQ registration failed for ME0\n"); + } + + /* Register FLR interrupt handler */ + irq_name = &hw->irq_name[RVU_PF_INT_VEC_VFFLR0 * NAME_SIZE]; + snprintf(irq_name, NAME_SIZE, "RVUPF%d_FLR0", rvu_get_pf(pf->pcifunc)); + ret = request_irq(pci_irq_vector(pf->pdev, RVU_PF_INT_VEC_VFFLR0), + otx2_pf_flr_intr_handler, 0, irq_name, pf); + if (ret) { + dev_err(pf->dev, + "RVUPF: IRQ registration failed for FLR0\n"); + return ret; + } + + if (numvfs > 64) { + irq_name = &hw->irq_name[RVU_PF_INT_VEC_VFME1 * NAME_SIZE]; + snprintf(irq_name, NAME_SIZE, "RVUPF%d_ME1", + rvu_get_pf(pf->pcifunc)); + ret = request_irq(pci_irq_vector + (pf->pdev, RVU_PF_INT_VEC_VFME1), + otx2_pf_me_intr_handler, 0, irq_name, pf); + if (ret) { + dev_err(pf->dev, + "RVUPF: IRQ registration failed for ME1\n"); + } + irq_name = &hw->irq_name[RVU_PF_INT_VEC_VFFLR1 * NAME_SIZE]; + snprintf(irq_name, NAME_SIZE, "RVUPF%d_FLR1", + rvu_get_pf(pf->pcifunc)); + ret = request_irq(pci_irq_vector + (pf->pdev, RVU_PF_INT_VEC_VFFLR1), + otx2_pf_flr_intr_handler, 0, irq_name, pf); + if (ret) { + dev_err(pf->dev, + "RVUPF: IRQ registration failed for FLR1\n"); + return ret; + } + } + + /* Enable ME interrupt for all VFs*/ + otx2_write64(pf, RVU_PF_VFME_INTX(0), INTR_MASK(numvfs)); + otx2_write64(pf, RVU_PF_VFME_INT_ENA_W1SX(0), INTR_MASK(numvfs)); + + /* Enable FLR interrupt for all VFs*/ + otx2_write64(pf, RVU_PF_VFFLR_INTX(0), INTR_MASK(numvfs)); + otx2_write64(pf, RVU_PF_VFFLR_INT_ENA_W1SX(0), INTR_MASK(numvfs)); + + if (numvfs > 64) { + numvfs -= 64; + + otx2_write64(pf, RVU_PF_VFME_INTX(1), INTR_MASK(numvfs)); + otx2_write64(pf, RVU_PF_VFME_INT_ENA_W1SX(1), + INTR_MASK(numvfs)); + + otx2_write64(pf, RVU_PF_VFFLR_INTX(1), INTR_MASK(numvfs)); + otx2_write64(pf, RVU_PF_VFFLR_INT_ENA_W1SX(1), + INTR_MASK(numvfs)); + } + return 0; +} + +static int otx2_pf_flr_init(struct otx2_nic *pf, int num_vfs) +{ + int vf; + + pf->flr_wq = alloc_workqueue("otx2_pf_flr_wq", + WQ_UNBOUND | WQ_HIGHPRI, 1); + if (!pf->flr_wq) + return -ENOMEM; + + pf->flr_wrk = devm_kcalloc(pf->dev, num_vfs, + sizeof(struct flr_work), GFP_KERNEL); + if (!pf->flr_wrk) { + destroy_workqueue(pf->flr_wq); + return -ENOMEM; + } + + for (vf = 0; vf < num_vfs; vf++) { + pf->flr_wrk[vf].pf = pf; + INIT_WORK(&pf->flr_wrk[vf].work, otx2_flr_handler); + } + + return 0; +} + static void otx2_queue_work(struct mbox *mw, struct workqueue_struct *mbox_wq, int first, int mdevs, u64 intr, int type) { @@ -115,9 +331,391 @@ static void otx2_queue_work(struct mbox *mw, struct workqueue_struct *mbox_wq, } } +static void otx2_forward_msg_pfvf(struct otx2_mbox_dev *mdev, + struct otx2_mbox *pfvf_mbox, void *bbuf_base, + int devid) +{ + struct otx2_mbox_dev *src_mdev = mdev; + int offset; + + /* Msgs are already copied, trigger VF's mbox irq */ + smp_wmb(); + + offset = pfvf_mbox->trigger | (devid << pfvf_mbox->tr_shift); + writeq(1, (void __iomem *)pfvf_mbox->reg_base + offset); + + /* Restore VF's mbox bounce buffer region address */ + src_mdev->mbase = bbuf_base; +} + +static int otx2_forward_vf_mbox_msgs(struct otx2_nic *pf, + struct otx2_mbox *src_mbox, + int dir, int vf, int num_msgs) +{ + struct otx2_mbox_dev *src_mdev, *dst_mdev; + struct mbox_hdr *mbox_hdr; + struct mbox_hdr *req_hdr; + struct mbox *dst_mbox; + int dst_size, err; + + if (dir == MBOX_DIR_PFAF) { + /* Set VF's mailbox memory as PF's bounce buffer memory, so + * that explicit copying of VF's msgs to PF=>AF mbox region + * and AF=>PF responses to VF's mbox region can be avoided. + */ + src_mdev = &src_mbox->dev[vf]; + mbox_hdr = src_mbox->hwbase + + src_mbox->rx_start + (vf * MBOX_SIZE); + + dst_mbox = &pf->mbox; + dst_size = dst_mbox->mbox.tx_size - + ALIGN(sizeof(*mbox_hdr), MBOX_MSG_ALIGN); + /* Check if msgs fit into destination area */ + if (mbox_hdr->msg_size > dst_size) + return -EINVAL; + + dst_mdev = &dst_mbox->mbox.dev[0]; + + mutex_lock(&pf->mbox.lock); + dst_mdev->mbase = src_mdev->mbase; + dst_mdev->msg_size = mbox_hdr->msg_size; + dst_mdev->num_msgs = num_msgs; + err = otx2_sync_mbox_msg(dst_mbox); + if (err) { + dev_warn(pf->dev, + "AF not responding to VF%d messages\n", vf); + /* restore PF mbase and exit */ + dst_mdev->mbase = pf->mbox.bbuf_base; + mutex_unlock(&pf->mbox.lock); + return err; + } + /* At this point, all the VF messages sent to AF are acked + * with proper responses and responses are copied to VF + * mailbox hence raise interrupt to VF. + */ + req_hdr = (struct mbox_hdr *)(dst_mdev->mbase + + dst_mbox->mbox.rx_start); + req_hdr->num_msgs = num_msgs; + + otx2_forward_msg_pfvf(dst_mdev, &pf->mbox_pfvf[0].mbox, + pf->mbox.bbuf_base, vf); + mutex_unlock(&pf->mbox.lock); + } else if (dir == MBOX_DIR_PFVF_UP) { + src_mdev = &src_mbox->dev[0]; + mbox_hdr = src_mbox->hwbase + src_mbox->rx_start; + req_hdr = (struct mbox_hdr *)(src_mdev->mbase + + src_mbox->rx_start); + req_hdr->num_msgs = num_msgs; + + dst_mbox = &pf->mbox_pfvf[0]; + dst_size = dst_mbox->mbox_up.tx_size - + ALIGN(sizeof(*mbox_hdr), MBOX_MSG_ALIGN); + /* Check if msgs fit into destination area */ + if (mbox_hdr->msg_size > dst_size) + return -EINVAL; + + dst_mdev = &dst_mbox->mbox_up.dev[vf]; + dst_mdev->mbase = src_mdev->mbase; + dst_mdev->msg_size = mbox_hdr->msg_size; + dst_mdev->num_msgs = mbox_hdr->num_msgs; + err = otx2_sync_mbox_up_msg(dst_mbox, vf); + if (err) { + dev_warn(pf->dev, + "VF%d is not responding to mailbox\n", vf); + return err; + } + } else if (dir == MBOX_DIR_VFPF_UP) { + req_hdr = (struct mbox_hdr *)(src_mbox->dev[0].mbase + + src_mbox->rx_start); + req_hdr->num_msgs = num_msgs; + otx2_forward_msg_pfvf(&pf->mbox_pfvf->mbox_up.dev[vf], + &pf->mbox.mbox_up, + pf->mbox_pfvf[vf].bbuf_base, + 0); + } + + return 0; +} + +static void otx2_pfvf_mbox_handler(struct work_struct *work) +{ + struct mbox_msghdr *msg = NULL; + int offset, vf_idx, id, err; + struct otx2_mbox_dev *mdev; + struct mbox_hdr *req_hdr; + struct otx2_mbox *mbox; + struct mbox *vf_mbox; + struct otx2_nic *pf; + + vf_mbox = container_of(work, struct mbox, mbox_wrk); + pf = vf_mbox->pfvf; + vf_idx = vf_mbox - pf->mbox_pfvf; + + mbox = &pf->mbox_pfvf[0].mbox; + mdev = &mbox->dev[vf_idx]; + req_hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + + offset = ALIGN(sizeof(*req_hdr), MBOX_MSG_ALIGN); + + for (id = 0; id < vf_mbox->num_msgs; id++) { + msg = (struct mbox_msghdr *)(mdev->mbase + mbox->rx_start + + offset); + + if (msg->sig != OTX2_MBOX_REQ_SIG) + goto inval_msg; + + /* Set VF's number in each of the msg */ + msg->pcifunc &= RVU_PFVF_FUNC_MASK; + msg->pcifunc |= (vf_idx + 1) & RVU_PFVF_FUNC_MASK; + offset = msg->next_msgoff; + } + err = otx2_forward_vf_mbox_msgs(pf, mbox, MBOX_DIR_PFAF, vf_idx, + vf_mbox->num_msgs); + if (err) + goto inval_msg; + return; + +inval_msg: + otx2_reply_invalid_msg(mbox, vf_idx, 0, msg->id); + otx2_mbox_msg_send(mbox, vf_idx); +} + +static void otx2_pfvf_mbox_up_handler(struct work_struct *work) +{ + struct mbox *vf_mbox = container_of(work, struct mbox, mbox_up_wrk); + struct otx2_nic *pf = vf_mbox->pfvf; + struct otx2_mbox_dev *mdev; + int offset, id, vf_idx = 0; + struct mbox_hdr *rsp_hdr; + struct mbox_msghdr *msg; + struct otx2_mbox *mbox; + + vf_idx = vf_mbox - pf->mbox_pfvf; + mbox = &pf->mbox_pfvf[0].mbox_up; + mdev = &mbox->dev[vf_idx]; + + rsp_hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); + + for (id = 0; id < vf_mbox->up_num_msgs; id++) { + msg = mdev->mbase + offset; + + if (msg->id >= MBOX_MSG_MAX) { + dev_err(pf->dev, + "Mbox msg with unknown ID 0x%x\n", msg->id); + goto end; + } + + if (msg->sig != OTX2_MBOX_RSP_SIG) { + dev_err(pf->dev, + "Mbox msg with wrong signature %x, ID 0x%x\n", + msg->sig, msg->id); + goto end; + } + + switch (msg->id) { + case MBOX_MSG_CGX_LINK_EVENT: + break; + default: + if (msg->rc) + dev_err(pf->dev, + "Mbox msg response has err %d, ID 0x%x\n", + msg->rc, msg->id); + break; + } + +end: + offset = mbox->rx_start + msg->next_msgoff; + mdev->msgs_acked++; + } + + otx2_mbox_reset(mbox, vf_idx); +} + +static irqreturn_t otx2_pfvf_mbox_intr_handler(int irq, void *pf_irq) +{ + struct otx2_nic *pf = (struct otx2_nic *)(pf_irq); + int vfs = pf->total_vfs; + struct mbox *mbox; + u64 intr; + + mbox = pf->mbox_pfvf; + /* Handle VF interrupts */ + if (vfs > 64) { + intr = otx2_read64(pf, RVU_PF_VFPF_MBOX_INTX(1)); + otx2_write64(pf, RVU_PF_VFPF_MBOX_INTX(1), intr); + otx2_queue_work(mbox, pf->mbox_pfvf_wq, 64, vfs, intr, + TYPE_PFVF); + vfs -= 64; + } + + intr = otx2_read64(pf, RVU_PF_VFPF_MBOX_INTX(0)); + otx2_write64(pf, RVU_PF_VFPF_MBOX_INTX(0), intr); + + otx2_queue_work(mbox, pf->mbox_pfvf_wq, 0, vfs, intr, TYPE_PFVF); + + return IRQ_HANDLED; +} + +static int otx2_pfvf_mbox_init(struct otx2_nic *pf, int numvfs) +{ + void __iomem *hwbase; + struct mbox *mbox; + int err, vf; + u64 base; + + if (!numvfs) + return -EINVAL; + + pf->mbox_pfvf = devm_kcalloc(&pf->pdev->dev, numvfs, + sizeof(struct mbox), GFP_KERNEL); + if (!pf->mbox_pfvf) + return -ENOMEM; + + pf->mbox_pfvf_wq = alloc_workqueue("otx2_pfvf_mailbox", + WQ_UNBOUND | WQ_HIGHPRI | + WQ_MEM_RECLAIM, 1); + if (!pf->mbox_pfvf_wq) + return -ENOMEM; + + base = readq((void __iomem *)((u64)pf->reg_base + RVU_PF_VF_BAR4_ADDR)); + hwbase = ioremap_wc(base, MBOX_SIZE * pf->total_vfs); + + if (!hwbase) { + err = -ENOMEM; + goto free_wq; + } + + mbox = &pf->mbox_pfvf[0]; + err = otx2_mbox_init(&mbox->mbox, hwbase, pf->pdev, pf->reg_base, + MBOX_DIR_PFVF, numvfs); + if (err) + goto free_iomem; + + err = otx2_mbox_init(&mbox->mbox_up, hwbase, pf->pdev, pf->reg_base, + MBOX_DIR_PFVF_UP, numvfs); + if (err) + goto free_iomem; + + for (vf = 0; vf < numvfs; vf++) { + mbox->pfvf = pf; + INIT_WORK(&mbox->mbox_wrk, otx2_pfvf_mbox_handler); + INIT_WORK(&mbox->mbox_up_wrk, otx2_pfvf_mbox_up_handler); + mbox++; + } + + return 0; + +free_iomem: + if (hwbase) + iounmap(hwbase); +free_wq: + destroy_workqueue(pf->mbox_pfvf_wq); + return err; +} + +static void otx2_pfvf_mbox_destroy(struct otx2_nic *pf) +{ + struct mbox *mbox = &pf->mbox_pfvf[0]; + + if (!mbox) + return; + + if (pf->mbox_pfvf_wq) { + destroy_workqueue(pf->mbox_pfvf_wq); + pf->mbox_pfvf_wq = NULL; + } + + if (mbox->mbox.hwbase) + iounmap(mbox->mbox.hwbase); + + otx2_mbox_destroy(&mbox->mbox); +} + +static void otx2_enable_pfvf_mbox_intr(struct otx2_nic *pf, int numvfs) +{ + /* Clear PF <=> VF mailbox IRQ */ + otx2_write64(pf, RVU_PF_VFPF_MBOX_INTX(0), ~0ull); + otx2_write64(pf, RVU_PF_VFPF_MBOX_INTX(1), ~0ull); + + /* Enable PF <=> VF mailbox IRQ */ + otx2_write64(pf, RVU_PF_VFPF_MBOX_INT_ENA_W1SX(0), INTR_MASK(numvfs)); + if (numvfs > 64) { + numvfs -= 64; + otx2_write64(pf, RVU_PF_VFPF_MBOX_INT_ENA_W1SX(1), + INTR_MASK(numvfs)); + } +} + +static void otx2_disable_pfvf_mbox_intr(struct otx2_nic *pf, int numvfs) +{ + int vector; + + /* Disable PF <=> VF mailbox IRQ */ + otx2_write64(pf, RVU_PF_VFPF_MBOX_INT_ENA_W1CX(0), ~0ull); + otx2_write64(pf, RVU_PF_VFPF_MBOX_INT_ENA_W1CX(1), ~0ull); + + otx2_write64(pf, RVU_PF_VFPF_MBOX_INTX(0), ~0ull); + vector = pci_irq_vector(pf->pdev, RVU_PF_INT_VEC_VFPF_MBOX0); + free_irq(vector, pf); + + if (numvfs > 64) { + otx2_write64(pf, RVU_PF_VFPF_MBOX_INTX(1), ~0ull); + vector = pci_irq_vector(pf->pdev, RVU_PF_INT_VEC_VFPF_MBOX1); + free_irq(vector, pf); + } +} + +static int otx2_register_pfvf_mbox_intr(struct otx2_nic *pf, int numvfs) +{ + struct otx2_hw *hw = &pf->hw; + char *irq_name; + int err; + + /* Register MBOX0 interrupt handler */ + irq_name = &hw->irq_name[RVU_PF_INT_VEC_VFPF_MBOX0 * NAME_SIZE]; + if (pf->pcifunc) + snprintf(irq_name, NAME_SIZE, + "RVUPF%d_VF Mbox0", rvu_get_pf(pf->pcifunc)); + else + snprintf(irq_name, NAME_SIZE, "RVUPF_VF Mbox0"); + err = request_irq(pci_irq_vector(pf->pdev, RVU_PF_INT_VEC_VFPF_MBOX0), + otx2_pfvf_mbox_intr_handler, 0, irq_name, pf); + if (err) { + dev_err(pf->dev, + "RVUPF: IRQ registration failed for PFVF mbox0 irq\n"); + return err; + } + + if (numvfs > 64) { + /* Register MBOX1 interrupt handler */ + irq_name = &hw->irq_name[RVU_PF_INT_VEC_VFPF_MBOX1 * NAME_SIZE]; + if (pf->pcifunc) + snprintf(irq_name, NAME_SIZE, + "RVUPF%d_VF Mbox1", rvu_get_pf(pf->pcifunc)); + else + snprintf(irq_name, NAME_SIZE, "RVUPF_VF Mbox1"); + err = request_irq(pci_irq_vector(pf->pdev, + RVU_PF_INT_VEC_VFPF_MBOX1), + otx2_pfvf_mbox_intr_handler, + 0, irq_name, pf); + if (err) { + dev_err(pf->dev, + "RVUPF: IRQ registration failed for PFVF mbox1 irq\n"); + return err; + } + } + + otx2_enable_pfvf_mbox_intr(pf, numvfs); + + return 0; +} + static void otx2_process_pfaf_mbox_msg(struct otx2_nic *pf, struct mbox_msghdr *msg) { + int devid; + if (msg->id >= MBOX_MSG_MAX) { dev_err(pf->dev, "Mbox msg with unknown ID 0x%x\n", msg->id); @@ -131,6 +729,26 @@ static void otx2_process_pfaf_mbox_msg(struct otx2_nic *pf, return; } + /* message response heading VF */ + devid = msg->pcifunc & RVU_PFVF_FUNC_MASK; + if (devid) { + struct otx2_vf_config *config = &pf->vf_configs[devid - 1]; + struct delayed_work *dwork; + + switch (msg->id) { + case MBOX_MSG_NIX_LF_START_RX: + config->intf_down = false; + dwork = &config->link_event_work; + schedule_delayed_work(dwork, msecs_to_jiffies(100)); + break; + case MBOX_MSG_NIX_LF_STOP_RX: + config->intf_down = true; + break; + } + + return; + } + switch (msg->id) { case MBOX_MSG_READY: pf->pcifunc = msg->pcifunc; @@ -212,9 +830,22 @@ int otx2_mbox_up_handler_cgx_link_event(struct otx2_nic *pf, struct cgx_link_info_msg *msg, struct msg_rsp *rsp) { + int i; + /* Copy the link info sent by AF */ pf->linfo = msg->link_info; + /* notify VFs about link event */ + for (i = 0; i < pci_num_vf(pf->pdev); i++) { + struct otx2_vf_config *config = &pf->vf_configs[i]; + struct delayed_work *dwork = &config->link_event_work; + + if (config->intf_down) + continue; + + schedule_delayed_work(dwork, msecs_to_jiffies(100)); + } + /* interface has not been fully configured yet */ if (pf->flags & OTX2_FLAG_INTF_DOWN) return 0; @@ -286,6 +917,12 @@ static void otx2_pfaf_mbox_up_handler(struct work_struct *work) otx2_process_mbox_msg_up(pf, msg); offset = mbox->rx_start + msg->next_msgoff; } + if (devid) { + otx2_forward_vf_mbox_msgs(pf, &pf->mbox.mbox_up, + MBOX_DIR_PFVF_UP, devid - 1, + af_mbox->up_num_msgs); + return; + } otx2_mbox_msg_send(mbox, 0); } @@ -362,7 +999,6 @@ static void otx2_pfaf_mbox_destroy(struct otx2_nic *pf) struct mbox *mbox = &pf->mbox; if (pf->mbox_wq) { - flush_workqueue(pf->mbox_wq); destroy_workqueue(pf->mbox_wq); pf->mbox_wq = NULL; } @@ -415,7 +1051,7 @@ static int otx2_pfaf_mbox_init(struct otx2_nic *pf) INIT_WORK(&mbox->mbox_wrk, otx2_pfaf_mbox_handler); INIT_WORK(&mbox->mbox_up_wrk, otx2_pfaf_mbox_up_handler); - otx2_mbox_lock_init(&pf->mbox); + mutex_init(&mbox->lock); return 0; exit: @@ -428,19 +1064,19 @@ static int otx2_cgx_config_linkevents(struct otx2_nic *pf, bool enable) struct msg_req *msg; int err; - otx2_mbox_lock(&pf->mbox); + mutex_lock(&pf->mbox.lock); if (enable) msg = otx2_mbox_alloc_msg_cgx_start_linkevents(&pf->mbox); else msg = otx2_mbox_alloc_msg_cgx_stop_linkevents(&pf->mbox); if (!msg) { - otx2_mbox_unlock(&pf->mbox); + mutex_unlock(&pf->mbox.lock); return -ENOMEM; } err = otx2_sync_mbox_msg(&pf->mbox); - otx2_mbox_unlock(&pf->mbox); + mutex_unlock(&pf->mbox.lock); return err; } @@ -449,19 +1085,19 @@ static int otx2_cgx_config_loopback(struct otx2_nic *pf, bool enable) struct msg_req *msg; int err; - otx2_mbox_lock(&pf->mbox); + mutex_lock(&pf->mbox.lock); if (enable) msg = otx2_mbox_alloc_msg_cgx_intlbk_enable(&pf->mbox); else msg = otx2_mbox_alloc_msg_cgx_intlbk_disable(&pf->mbox); if (!msg) { - otx2_mbox_unlock(&pf->mbox); + mutex_unlock(&pf->mbox.lock); return -ENOMEM; } err = otx2_sync_mbox_msg(&pf->mbox); - otx2_mbox_unlock(&pf->mbox); + mutex_unlock(&pf->mbox.lock); return err; } @@ -483,6 +1119,7 @@ int otx2_set_real_num_queues(struct net_device *netdev, "Failed to set no of Rx queues: %d\n", rx_queues); return err; } +EXPORT_SYMBOL(otx2_set_real_num_queues); static irqreturn_t otx2_q_intr_handler(int irq, void *data) { @@ -646,7 +1283,7 @@ static int otx2_init_hw_resources(struct otx2_nic *pf) /* Get the size of receive buffers to allocate */ pf->rbsize = RCV_FRAG_LEN(pf->netdev->mtu + OTX2_ETH_HLEN); - otx2_mbox_lock(mbox); + mutex_lock(&mbox->lock); /* NPA init */ err = otx2_config_npa(pf); if (err) @@ -663,35 +1300,35 @@ static int otx2_init_hw_resources(struct otx2_nic *pf) /* Init Auras and pools used by NIX RQ, for free buffer ptrs */ err = otx2_rq_aura_pool_init(pf); if (err) { - otx2_mbox_unlock(mbox); + mutex_unlock(&mbox->lock); goto err_free_nix_lf; } /* Init Auras and pools used by NIX SQ, for queueing SQEs */ err = otx2_sq_aura_pool_init(pf); if (err) { - otx2_mbox_unlock(mbox); + mutex_unlock(&mbox->lock); goto err_free_rq_ptrs; } err = otx2_txsch_alloc(pf); if (err) { - otx2_mbox_unlock(mbox); + mutex_unlock(&mbox->lock); goto err_free_sq_ptrs; } err = otx2_config_nix_queues(pf); if (err) { - otx2_mbox_unlock(mbox); + mutex_unlock(&mbox->lock); goto err_free_txsch; } for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) { err = otx2_txschq_config(pf, lvl); if (err) { - otx2_mbox_unlock(mbox); + mutex_unlock(&mbox->lock); goto err_free_nix_queues; } } - otx2_mbox_unlock(mbox); + mutex_unlock(&mbox->lock); return err; err_free_nix_queues: @@ -709,7 +1346,7 @@ err_free_rq_ptrs: otx2_ctx_disable(mbox, NPA_AQ_CTYPE_AURA, true); otx2_aura_pool_free(pf); err_free_nix_lf: - otx2_mbox_lock(mbox); + mutex_lock(&mbox->lock); req = otx2_mbox_alloc_msg_nix_lf_free(mbox); if (req) { if (otx2_sync_mbox_msg(mbox)) @@ -723,7 +1360,7 @@ err_free_npa_lf: dev_err(pf->dev, "%s failed to free npalf\n", __func__); } exit: - otx2_mbox_unlock(mbox); + mutex_unlock(&mbox->lock); return err; } @@ -743,11 +1380,11 @@ static void otx2_free_hw_resources(struct otx2_nic *pf) if (err) dev_err(pf->dev, "RVUPF: Failed to stop/free TX schedulers\n"); - otx2_mbox_lock(mbox); + mutex_lock(&mbox->lock); /* Disable backpressure */ if (!(pf->pcifunc & RVU_PFVF_FUNC_MASK)) otx2_nix_config_bp(pf, false); - otx2_mbox_unlock(mbox); + mutex_unlock(&mbox->lock); /* Disable RQs */ otx2_ctx_disable(mbox, NIX_AQ_CTYPE_RQ, false); @@ -768,28 +1405,28 @@ static void otx2_free_hw_resources(struct otx2_nic *pf) otx2_free_cq_res(pf); - otx2_mbox_lock(mbox); + mutex_lock(&mbox->lock); /* Reset NIX LF */ req = otx2_mbox_alloc_msg_nix_lf_free(mbox); if (req) { if (otx2_sync_mbox_msg(mbox)) dev_err(pf->dev, "%s failed to free nixlf\n", __func__); } - otx2_mbox_unlock(mbox); + mutex_unlock(&mbox->lock); /* Disable NPA Pool and Aura hw context */ otx2_ctx_disable(mbox, NPA_AQ_CTYPE_POOL, true); otx2_ctx_disable(mbox, NPA_AQ_CTYPE_AURA, true); otx2_aura_pool_free(pf); - otx2_mbox_lock(mbox); + mutex_lock(&mbox->lock); /* Reset NPA LF */ req = otx2_mbox_alloc_msg_npa_lf_free(mbox); if (req) { if (otx2_sync_mbox_msg(mbox)) dev_err(pf->dev, "%s failed to free npalf\n", __func__); } - otx2_mbox_unlock(mbox); + mutex_unlock(&mbox->lock); } int otx2_open(struct net_device *netdev) @@ -918,6 +1555,9 @@ int otx2_open(struct net_device *netdev) if (pf->linfo.link_up && !(pf->pcifunc & RVU_PFVF_FUNC_MASK)) otx2_handle_link_event(pf); + /* Restore pause frame settings */ + otx2_config_pause_frm(pf); + err = otx2_rxtx_enable(pf, true); if (err) goto err_free_cints; @@ -941,6 +1581,7 @@ err_free_mem: kfree(qset->napi); return err; } +EXPORT_SYMBOL(otx2_open); int otx2_stop(struct net_device *netdev) { @@ -1001,6 +1642,7 @@ int otx2_stop(struct net_device *netdev) sizeof(*qset) - offsetof(struct otx2_qset, sqe_cnt)); return 0; } +EXPORT_SYMBOL(otx2_stop); static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev) { @@ -1042,10 +1684,10 @@ static void otx2_set_rx_mode(struct net_device *netdev) if (!(netdev->flags & IFF_UP)) return; - otx2_mbox_lock(&pf->mbox); + mutex_lock(&pf->mbox.lock); req = otx2_mbox_alloc_msg_nix_set_rx_mode(&pf->mbox); if (!req) { - otx2_mbox_unlock(&pf->mbox); + mutex_unlock(&pf->mbox.lock); return; } @@ -1058,7 +1700,7 @@ static void otx2_set_rx_mode(struct net_device *netdev) req->mode |= NIX_RX_MODE_ALLMULTI; otx2_sync_mbox_msg(&pf->mbox); - otx2_mbox_unlock(&pf->mbox); + mutex_unlock(&pf->mbox.lock); } static int otx2_set_features(struct net_device *netdev, @@ -1129,7 +1771,6 @@ static int otx2_realloc_msix_vectors(struct otx2_nic *pf) otx2_disable_mbox_intr(pf); pci_free_irq_vectors(hw->pdev); - pci_free_irq_vectors(hw->pdev); err = pci_alloc_irq_vectors(hw->pdev, num_vec, num_vec, PCI_IRQ_MSIX); if (err < 0) { dev_err(pf->dev, "%s: Failed to realloc %d IRQ vectors\n", @@ -1184,6 +1825,7 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) pf->netdev = netdev; pf->pdev = pdev; pf->dev = dev; + pf->total_vfs = pci_sriov_get_totalvfs(pdev); pf->flags |= OTX2_FLAG_INTF_DOWN; hw = &pf->hw; @@ -1295,6 +1937,10 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) /* Enable link notifications */ otx2_cgx_config_linkevents(pf, true); + /* Enable pause frames by default */ + pf->flags |= OTX2_FLAG_RX_PAUSE_ENABLED; + pf->flags |= OTX2_FLAG_TX_PAUSE_ENABLED; + return 0; err_detach_rsrc: @@ -1313,6 +1959,121 @@ err_release_regions: return err; } +static void otx2_vf_link_event_task(struct work_struct *work) +{ + struct otx2_vf_config *config; + struct cgx_link_info_msg *req; + struct mbox_msghdr *msghdr; + struct otx2_nic *pf; + int vf_idx; + + config = container_of(work, struct otx2_vf_config, + link_event_work.work); + vf_idx = config - config->pf->vf_configs; + pf = config->pf; + + msghdr = otx2_mbox_alloc_msg_rsp(&pf->mbox_pfvf[0].mbox_up, vf_idx, + sizeof(*req), sizeof(struct msg_rsp)); + if (!msghdr) { + dev_err(pf->dev, "Failed to create VF%d link event\n", vf_idx); + return; + } + + req = (struct cgx_link_info_msg *)msghdr; + req->hdr.id = MBOX_MSG_CGX_LINK_EVENT; + req->hdr.sig = OTX2_MBOX_REQ_SIG; + memcpy(&req->link_info, &pf->linfo, sizeof(req->link_info)); + + otx2_sync_mbox_up_msg(&pf->mbox_pfvf[0], vf_idx); +} + +static int otx2_sriov_enable(struct pci_dev *pdev, int numvfs) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct otx2_nic *pf = netdev_priv(netdev); + int ret, i; + + /* Init PF <=> VF mailbox stuff */ + ret = otx2_pfvf_mbox_init(pf, numvfs); + if (ret) + return ret; + + ret = otx2_register_pfvf_mbox_intr(pf, numvfs); + if (ret) + goto free_mbox; + + pf->vf_configs = kcalloc(numvfs, sizeof(struct otx2_vf_config), + GFP_KERNEL); + if (!pf->vf_configs) { + ret = -ENOMEM; + goto free_intr; + } + + for (i = 0; i < numvfs; i++) { + pf->vf_configs[i].pf = pf; + pf->vf_configs[i].intf_down = true; + INIT_DELAYED_WORK(&pf->vf_configs[i].link_event_work, + otx2_vf_link_event_task); + } + + ret = otx2_pf_flr_init(pf, numvfs); + if (ret) + goto free_configs; + + ret = otx2_register_flr_me_intr(pf, numvfs); + if (ret) + goto free_flr; + + ret = pci_enable_sriov(pdev, numvfs); + if (ret) + goto free_flr_intr; + + return numvfs; +free_flr_intr: + otx2_disable_flr_me_intr(pf); +free_flr: + otx2_flr_wq_destroy(pf); +free_configs: + kfree(pf->vf_configs); +free_intr: + otx2_disable_pfvf_mbox_intr(pf, numvfs); +free_mbox: + otx2_pfvf_mbox_destroy(pf); + return ret; +} + +static int otx2_sriov_disable(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct otx2_nic *pf = netdev_priv(netdev); + int numvfs = pci_num_vf(pdev); + int i; + + if (!numvfs) + return 0; + + pci_disable_sriov(pdev); + + for (i = 0; i < pci_num_vf(pdev); i++) + cancel_delayed_work_sync(&pf->vf_configs[i].link_event_work); + kfree(pf->vf_configs); + + otx2_disable_flr_me_intr(pf); + otx2_flr_wq_destroy(pf); + otx2_disable_pfvf_mbox_intr(pf, numvfs); + otx2_pfvf_mbox_destroy(pf); + + return 0; +} + +static int otx2_sriov_configure(struct pci_dev *pdev, int numvfs) +{ + if (numvfs == 0) + return otx2_sriov_disable(pdev); + else + return otx2_sriov_enable(pdev, numvfs); +} + static void otx2_remove(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); @@ -1327,6 +2088,8 @@ static void otx2_remove(struct pci_dev *pdev) otx2_cgx_config_linkevents(pf, false); unregister_netdev(netdev); + otx2_sriov_disable(pf->pdev); + otx2_detach_resources(&pf->mbox); otx2_disable_mbox_intr(pf); otx2_pfaf_mbox_destroy(pf); @@ -1343,6 +2106,7 @@ static struct pci_driver otx2_pf_driver = { .probe = otx2_probe, .shutdown = otx2_remove, .remove = otx2_remove, + .sriov_configure = otx2_sriov_configure }; static int __init otx2_rvupf_init_module(void) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h index 7963d418886a..867f646e0802 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h @@ -45,6 +45,19 @@ #define RVU_PF_MSIX_VECX_CTL(a) (0x008 | (a) << 4) #define RVU_PF_MSIX_PBAX(a) (0xF0000 | (a) << 3) +/* RVU VF registers */ +#define RVU_VF_VFPF_MBOX0 (0x00000) +#define RVU_VF_VFPF_MBOX1 (0x00008) +#define RVU_VF_VFPF_MBOXX(a) (0x00 | (a) << 3) +#define RVU_VF_INT (0x20) +#define RVU_VF_INT_W1S (0x28) +#define RVU_VF_INT_ENA_W1S (0x30) +#define RVU_VF_INT_ENA_W1C (0x38) +#define RVU_VF_BLOCK_ADDRX_DISC(a) (0x200 | (a) << 3) +#define RVU_VF_MSIX_VECX_ADDR(a) (0x000 | (a) << 4) +#define RVU_VF_MSIX_VECX_CTL(a) (0x008 | (a) << 4) +#define RVU_VF_MSIX_PBAX(a) (0xF0000 | (a) << 3) + #define RVU_FUNC_BLKADDR_SHIFT 20 #define RVU_FUNC_BLKADDR_MASK 0x1FULL diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index bef4c20fe314..94044a5c01e7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -138,6 +138,25 @@ static void otx2_set_rxhash(struct otx2_nic *pfvf, skb_set_hash(skb, hash, hash_type); } +static void otx2_free_rcv_seg(struct otx2_nic *pfvf, struct nix_cqe_rx_s *cqe, + int qidx) +{ + struct nix_rx_sg_s *sg = &cqe->sg; + void *end, *start; + u64 *seg_addr; + int seg; + + start = (void *)sg; + end = start + ((cqe->parse.desc_sizem1 + 1) * 16); + while (start < end) { + sg = (struct nix_rx_sg_s *)start; + seg_addr = &sg->seg_addr; + for (seg = 0; seg < sg->segs; seg++, seg_addr++) + otx2_aura_freeptr(pfvf, qidx, *seg_addr & ~0x07ULL); + start += sizeof(*sg); + } +} + static bool otx2_check_rcv_errors(struct otx2_nic *pfvf, struct nix_cqe_rx_s *cqe, int qidx) { @@ -189,16 +208,17 @@ static bool otx2_check_rcv_errors(struct otx2_nic *pfvf, /* For now ignore all the NPC parser errors and * pass the packets to stack. */ - return false; + if (cqe->sg.segs == 1) + return false; } /* If RXALL is enabled pass on packets to stack. */ - if (cqe->sg.segs && (pfvf->netdev->features & NETIF_F_RXALL)) + if (cqe->sg.segs == 1 && (pfvf->netdev->features & NETIF_F_RXALL)) return false; /* Free buffer back to pool */ if (cqe->sg.segs) - otx2_aura_freeptr(pfvf, qidx, cqe->sg.seg_addr & ~0x07ULL); + otx2_free_rcv_seg(pfvf, cqe, qidx); return true; } @@ -210,7 +230,7 @@ static void otx2_rcv_pkt_handler(struct otx2_nic *pfvf, struct nix_rx_parse_s *parse = &cqe->parse; struct sk_buff *skb = NULL; - if (unlikely(parse->errlev || parse->errcode)) { + if (unlikely(parse->errlev || parse->errcode || cqe->sg.segs > 1)) { if (otx2_check_rcv_errors(pfvf, cqe, cq->cq_idx)) return; } @@ -778,6 +798,7 @@ bool otx2_sq_append_skb(struct net_device *netdev, struct otx2_snd_queue *sq, return true; } +EXPORT_SYMBOL(otx2_sq_append_skb); void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq) { @@ -788,11 +809,15 @@ void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq) while ((cqe = (struct nix_cqe_rx_s *)otx2_get_next_cqe(cq))) { if (!cqe->sg.subdc) continue; + processed_cqe++; + if (cqe->sg.segs > 1) { + otx2_free_rcv_seg(pfvf, cqe, cq->cq_idx); + continue; + } iova = cqe->sg.seg_addr - OTX2_HEAD_ROOM; pa = otx2_iova_to_phys(pfvf->iommu_domain, iova); otx2_dma_unmap_page(pfvf, iova, pfvf->rbsize, DMA_FROM_DEVICE); put_page(virt_to_page(phys_to_virt(pa))); - processed_cqe++; } /* Free CQEs to HW */ @@ -831,18 +856,18 @@ int otx2_rxtx_enable(struct otx2_nic *pfvf, bool enable) struct msg_req *msg; int err; - otx2_mbox_lock(&pfvf->mbox); + mutex_lock(&pfvf->mbox.lock); if (enable) msg = otx2_mbox_alloc_msg_nix_lf_start_rx(&pfvf->mbox); else msg = otx2_mbox_alloc_msg_nix_lf_stop_rx(&pfvf->mbox); if (!msg) { - otx2_mbox_unlock(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); return -ENOMEM; } err = otx2_sync_mbox_msg(&pfvf->mbox); - otx2_mbox_unlock(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); return err; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c new file mode 100644 index 000000000000..187c633a7af5 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -0,0 +1,648 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTx2 RVU Virtual Function ethernet driver */ + +#include <linux/etherdevice.h> +#include <linux/module.h> +#include <linux/pci.h> + +#include "otx2_common.h" +#include "otx2_reg.h" + +#define DRV_NAME "octeontx2-nicvf" +#define DRV_STRING "Marvell OcteonTX2 NIC Virtual Function Driver" + +static const struct pci_device_id otx2_vf_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_RVU_AFVF) }, + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_RVU_VF) }, + { } +}; + +MODULE_AUTHOR("Sunil Goutham <sgoutham@marvell.com>"); +MODULE_DESCRIPTION(DRV_STRING); +MODULE_LICENSE("GPL v2"); +MODULE_DEVICE_TABLE(pci, otx2_vf_id_table); + +/* RVU VF Interrupt Vector Enumeration */ +enum { + RVU_VF_INT_VEC_MBOX = 0x0, +}; + +static void otx2vf_process_vfaf_mbox_msg(struct otx2_nic *vf, + struct mbox_msghdr *msg) +{ + if (msg->id >= MBOX_MSG_MAX) { + dev_err(vf->dev, + "Mbox msg with unknown ID %d\n", msg->id); + return; + } + + if (msg->sig != OTX2_MBOX_RSP_SIG) { + dev_err(vf->dev, + "Mbox msg with wrong signature %x, ID %d\n", + msg->sig, msg->id); + return; + } + + if (msg->rc == MBOX_MSG_INVALID) { + dev_err(vf->dev, + "PF/AF says the sent msg(s) %d were invalid\n", + msg->id); + return; + } + + switch (msg->id) { + case MBOX_MSG_READY: + vf->pcifunc = msg->pcifunc; + break; + case MBOX_MSG_MSIX_OFFSET: + mbox_handler_msix_offset(vf, (struct msix_offset_rsp *)msg); + break; + case MBOX_MSG_NPA_LF_ALLOC: + mbox_handler_npa_lf_alloc(vf, (struct npa_lf_alloc_rsp *)msg); + break; + case MBOX_MSG_NIX_LF_ALLOC: + mbox_handler_nix_lf_alloc(vf, (struct nix_lf_alloc_rsp *)msg); + break; + case MBOX_MSG_NIX_TXSCH_ALLOC: + mbox_handler_nix_txsch_alloc(vf, + (struct nix_txsch_alloc_rsp *)msg); + break; + case MBOX_MSG_NIX_BP_ENABLE: + mbox_handler_nix_bp_enable(vf, (struct nix_bp_cfg_rsp *)msg); + break; + default: + if (msg->rc) + dev_err(vf->dev, + "Mbox msg response has err %d, ID %d\n", + msg->rc, msg->id); + } +} + +static void otx2vf_vfaf_mbox_handler(struct work_struct *work) +{ + struct otx2_mbox_dev *mdev; + struct mbox_hdr *rsp_hdr; + struct mbox_msghdr *msg; + struct otx2_mbox *mbox; + struct mbox *af_mbox; + int offset, id; + + af_mbox = container_of(work, struct mbox, mbox_wrk); + mbox = &af_mbox->mbox; + mdev = &mbox->dev[0]; + rsp_hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + if (af_mbox->num_msgs == 0) + return; + offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); + + for (id = 0; id < af_mbox->num_msgs; id++) { + msg = (struct mbox_msghdr *)(mdev->mbase + offset); + otx2vf_process_vfaf_mbox_msg(af_mbox->pfvf, msg); + offset = mbox->rx_start + msg->next_msgoff; + mdev->msgs_acked++; + } + + otx2_mbox_reset(mbox, 0); +} + +static int otx2vf_process_mbox_msg_up(struct otx2_nic *vf, + struct mbox_msghdr *req) +{ + struct msg_rsp *rsp; + int err; + + /* Check if valid, if not reply with a invalid msg */ + if (req->sig != OTX2_MBOX_REQ_SIG) { + otx2_reply_invalid_msg(&vf->mbox.mbox_up, 0, 0, req->id); + return -ENODEV; + } + + switch (req->id) { + case MBOX_MSG_CGX_LINK_EVENT: + rsp = (struct msg_rsp *)otx2_mbox_alloc_msg( + &vf->mbox.mbox_up, 0, + sizeof(struct msg_rsp)); + if (!rsp) + return -ENOMEM; + + rsp->hdr.id = MBOX_MSG_CGX_LINK_EVENT; + rsp->hdr.sig = OTX2_MBOX_RSP_SIG; + rsp->hdr.pcifunc = 0; + rsp->hdr.rc = 0; + err = otx2_mbox_up_handler_cgx_link_event( + vf, (struct cgx_link_info_msg *)req, rsp); + return err; + default: + otx2_reply_invalid_msg(&vf->mbox.mbox_up, 0, 0, req->id); + return -ENODEV; + } + return 0; +} + +static void otx2vf_vfaf_mbox_up_handler(struct work_struct *work) +{ + struct otx2_mbox_dev *mdev; + struct mbox_hdr *rsp_hdr; + struct mbox_msghdr *msg; + struct otx2_mbox *mbox; + struct mbox *vf_mbox; + struct otx2_nic *vf; + int offset, id; + + vf_mbox = container_of(work, struct mbox, mbox_up_wrk); + vf = vf_mbox->pfvf; + mbox = &vf_mbox->mbox_up; + mdev = &mbox->dev[0]; + + rsp_hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + if (vf_mbox->up_num_msgs == 0) + return; + + offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); + + for (id = 0; id < vf_mbox->up_num_msgs; id++) { + msg = (struct mbox_msghdr *)(mdev->mbase + offset); + otx2vf_process_mbox_msg_up(vf, msg); + offset = mbox->rx_start + msg->next_msgoff; + } + + otx2_mbox_msg_send(mbox, 0); +} + +static irqreturn_t otx2vf_vfaf_mbox_intr_handler(int irq, void *vf_irq) +{ + struct otx2_nic *vf = (struct otx2_nic *)vf_irq; + struct otx2_mbox_dev *mdev; + struct otx2_mbox *mbox; + struct mbox_hdr *hdr; + + /* Clear the IRQ */ + otx2_write64(vf, RVU_VF_INT, BIT_ULL(0)); + + /* Read latest mbox data */ + smp_rmb(); + + /* Check for PF => VF response messages */ + mbox = &vf->mbox.mbox; + mdev = &mbox->dev[0]; + otx2_sync_mbox_bbuf(mbox, 0); + + hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + if (hdr->num_msgs) { + vf->mbox.num_msgs = hdr->num_msgs; + hdr->num_msgs = 0; + memset(mbox->hwbase + mbox->rx_start, 0, + ALIGN(sizeof(struct mbox_hdr), sizeof(u64))); + queue_work(vf->mbox_wq, &vf->mbox.mbox_wrk); + } + /* Check for PF => VF notification messages */ + mbox = &vf->mbox.mbox_up; + mdev = &mbox->dev[0]; + otx2_sync_mbox_bbuf(mbox, 0); + + hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + if (hdr->num_msgs) { + vf->mbox.up_num_msgs = hdr->num_msgs; + hdr->num_msgs = 0; + memset(mbox->hwbase + mbox->rx_start, 0, + ALIGN(sizeof(struct mbox_hdr), sizeof(u64))); + queue_work(vf->mbox_wq, &vf->mbox.mbox_up_wrk); + } + + return IRQ_HANDLED; +} + +static void otx2vf_disable_mbox_intr(struct otx2_nic *vf) +{ + int vector = pci_irq_vector(vf->pdev, RVU_VF_INT_VEC_MBOX); + + /* Disable VF => PF mailbox IRQ */ + otx2_write64(vf, RVU_VF_INT_ENA_W1C, BIT_ULL(0)); + free_irq(vector, vf); +} + +static int otx2vf_register_mbox_intr(struct otx2_nic *vf, bool probe_pf) +{ + struct otx2_hw *hw = &vf->hw; + struct msg_req *req; + char *irq_name; + int err; + + /* Register mailbox interrupt handler */ + irq_name = &hw->irq_name[RVU_VF_INT_VEC_MBOX * NAME_SIZE]; + snprintf(irq_name, NAME_SIZE, "RVUVFAF Mbox"); + err = request_irq(pci_irq_vector(vf->pdev, RVU_VF_INT_VEC_MBOX), + otx2vf_vfaf_mbox_intr_handler, 0, irq_name, vf); + if (err) { + dev_err(vf->dev, + "RVUPF: IRQ registration failed for VFAF mbox irq\n"); + return err; + } + + /* Enable mailbox interrupt for msgs coming from PF. + * First clear to avoid spurious interrupts, if any. + */ + otx2_write64(vf, RVU_VF_INT, BIT_ULL(0)); + otx2_write64(vf, RVU_VF_INT_ENA_W1S, BIT_ULL(0)); + + if (!probe_pf) + return 0; + + /* Check mailbox communication with PF */ + req = otx2_mbox_alloc_msg_ready(&vf->mbox); + if (!req) { + otx2vf_disable_mbox_intr(vf); + return -ENOMEM; + } + + err = otx2_sync_mbox_msg(&vf->mbox); + if (err) { + dev_warn(vf->dev, + "AF not responding to mailbox, deferring probe\n"); + otx2vf_disable_mbox_intr(vf); + return -EPROBE_DEFER; + } + return 0; +} + +static void otx2vf_vfaf_mbox_destroy(struct otx2_nic *vf) +{ + struct mbox *mbox = &vf->mbox; + + if (vf->mbox_wq) { + flush_workqueue(vf->mbox_wq); + destroy_workqueue(vf->mbox_wq); + vf->mbox_wq = NULL; + } + + if (mbox->mbox.hwbase) + iounmap((void __iomem *)mbox->mbox.hwbase); + + otx2_mbox_destroy(&mbox->mbox); + otx2_mbox_destroy(&mbox->mbox_up); +} + +static int otx2vf_vfaf_mbox_init(struct otx2_nic *vf) +{ + struct mbox *mbox = &vf->mbox; + void __iomem *hwbase; + int err; + + mbox->pfvf = vf; + vf->mbox_wq = alloc_workqueue("otx2_vfaf_mailbox", + WQ_UNBOUND | WQ_HIGHPRI | + WQ_MEM_RECLAIM, 1); + if (!vf->mbox_wq) + return -ENOMEM; + + /* Mailbox is a reserved memory (in RAM) region shared between + * admin function (i.e PF0) and this VF, shouldn't be mapped as + * device memory to allow unaligned accesses. + */ + hwbase = ioremap_wc(pci_resource_start(vf->pdev, PCI_MBOX_BAR_NUM), + pci_resource_len(vf->pdev, PCI_MBOX_BAR_NUM)); + if (!hwbase) { + dev_err(vf->dev, "Unable to map VFAF mailbox region\n"); + err = -ENOMEM; + goto exit; + } + + err = otx2_mbox_init(&mbox->mbox, hwbase, vf->pdev, vf->reg_base, + MBOX_DIR_VFPF, 1); + if (err) + goto exit; + + err = otx2_mbox_init(&mbox->mbox_up, hwbase, vf->pdev, vf->reg_base, + MBOX_DIR_VFPF_UP, 1); + if (err) + goto exit; + + err = otx2_mbox_bbuf_init(mbox, vf->pdev); + if (err) + goto exit; + + INIT_WORK(&mbox->mbox_wrk, otx2vf_vfaf_mbox_handler); + INIT_WORK(&mbox->mbox_up_wrk, otx2vf_vfaf_mbox_up_handler); + mutex_init(&mbox->lock); + + return 0; +exit: + destroy_workqueue(vf->mbox_wq); + return err; +} + +static int otx2vf_open(struct net_device *netdev) +{ + struct otx2_nic *vf; + int err; + + err = otx2_open(netdev); + if (err) + return err; + + /* LBKs do not receive link events so tell everyone we are up here */ + vf = netdev_priv(netdev); + if (is_otx2_lbkvf(vf->pdev)) { + pr_info("%s NIC Link is UP\n", netdev->name); + netif_carrier_on(netdev); + netif_tx_start_all_queues(netdev); + } + + return 0; +} + +static int otx2vf_stop(struct net_device *netdev) +{ + return otx2_stop(netdev); +} + +static netdev_tx_t otx2vf_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct otx2_nic *vf = netdev_priv(netdev); + int qidx = skb_get_queue_mapping(skb); + struct otx2_snd_queue *sq; + struct netdev_queue *txq; + + sq = &vf->qset.sq[qidx]; + txq = netdev_get_tx_queue(netdev, qidx); + + if (!otx2_sq_append_skb(netdev, sq, skb, qidx)) { + netif_tx_stop_queue(txq); + + /* Check again, incase SQBs got freed up */ + smp_mb(); + if (((sq->num_sqbs - *sq->aura_fc_addr) * sq->sqe_per_sqb) + > sq->sqe_thresh) + netif_tx_wake_queue(txq); + + return NETDEV_TX_BUSY; + } + + return NETDEV_TX_OK; +} + +static int otx2vf_change_mtu(struct net_device *netdev, int new_mtu) +{ + bool if_up = netif_running(netdev); + int err = 0; + + if (if_up) + otx2vf_stop(netdev); + + netdev_info(netdev, "Changing MTU from %d to %d\n", + netdev->mtu, new_mtu); + netdev->mtu = new_mtu; + + if (if_up) + err = otx2vf_open(netdev); + + return err; +} + +static void otx2vf_reset_task(struct work_struct *work) +{ + struct otx2_nic *vf = container_of(work, struct otx2_nic, reset_task); + + rtnl_lock(); + + if (netif_running(vf->netdev)) { + otx2vf_stop(vf->netdev); + vf->reset_count++; + otx2vf_open(vf->netdev); + } + + rtnl_unlock(); +} + +static const struct net_device_ops otx2vf_netdev_ops = { + .ndo_open = otx2vf_open, + .ndo_stop = otx2vf_stop, + .ndo_start_xmit = otx2vf_xmit, + .ndo_set_mac_address = otx2_set_mac_address, + .ndo_change_mtu = otx2vf_change_mtu, + .ndo_get_stats64 = otx2_get_stats64, + .ndo_tx_timeout = otx2_tx_timeout, +}; + +static int otx2vf_realloc_msix_vectors(struct otx2_nic *vf) +{ + struct otx2_hw *hw = &vf->hw; + int num_vec, err; + + num_vec = hw->nix_msixoff; + num_vec += NIX_LF_CINT_VEC_START + hw->max_queues; + + otx2vf_disable_mbox_intr(vf); + pci_free_irq_vectors(hw->pdev); + err = pci_alloc_irq_vectors(hw->pdev, num_vec, num_vec, PCI_IRQ_MSIX); + if (err < 0) { + dev_err(vf->dev, "%s: Failed to realloc %d IRQ vectors\n", + __func__, num_vec); + return err; + } + + return otx2vf_register_mbox_intr(vf, false); +} + +static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + int num_vec = pci_msix_vec_count(pdev); + struct device *dev = &pdev->dev; + struct net_device *netdev; + struct otx2_nic *vf; + struct otx2_hw *hw; + int err, qcount; + + err = pcim_enable_device(pdev); + if (err) { + dev_err(dev, "Failed to enable PCI device\n"); + return err; + } + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(dev, "PCI request regions failed 0x%x\n", err); + return err; + } + + err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "DMA mask config failed, abort\n"); + goto err_release_regions; + } + + pci_set_master(pdev); + + qcount = num_online_cpus(); + netdev = alloc_etherdev_mqs(sizeof(*vf), qcount, qcount); + if (!netdev) { + err = -ENOMEM; + goto err_release_regions; + } + + pci_set_drvdata(pdev, netdev); + SET_NETDEV_DEV(netdev, &pdev->dev); + vf = netdev_priv(netdev); + vf->netdev = netdev; + vf->pdev = pdev; + vf->dev = dev; + vf->iommu_domain = iommu_get_domain_for_dev(dev); + + vf->flags |= OTX2_FLAG_INTF_DOWN; + hw = &vf->hw; + hw->pdev = vf->pdev; + hw->rx_queues = qcount; + hw->tx_queues = qcount; + hw->max_queues = qcount; + + hw->irq_name = devm_kmalloc_array(&hw->pdev->dev, num_vec, NAME_SIZE, + GFP_KERNEL); + if (!hw->irq_name) + goto err_free_netdev; + + hw->affinity_mask = devm_kcalloc(&hw->pdev->dev, num_vec, + sizeof(cpumask_var_t), GFP_KERNEL); + if (!hw->affinity_mask) + goto err_free_netdev; + + err = pci_alloc_irq_vectors(hw->pdev, num_vec, num_vec, PCI_IRQ_MSIX); + if (err < 0) { + dev_err(dev, "%s: Failed to alloc %d IRQ vectors\n", + __func__, num_vec); + goto err_free_netdev; + } + + vf->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0); + if (!vf->reg_base) { + dev_err(dev, "Unable to map physical function CSRs, aborting\n"); + err = -ENOMEM; + goto err_free_irq_vectors; + } + + /* Init VF <=> PF mailbox stuff */ + err = otx2vf_vfaf_mbox_init(vf); + if (err) + goto err_free_irq_vectors; + + /* Register mailbox interrupt */ + err = otx2vf_register_mbox_intr(vf, true); + if (err) + goto err_mbox_destroy; + + /* Request AF to attach NPA and LIX LFs to this AF */ + err = otx2_attach_npa_nix(vf); + if (err) + goto err_disable_mbox_intr; + + err = otx2vf_realloc_msix_vectors(vf); + if (err) + goto err_mbox_destroy; + + err = otx2_set_real_num_queues(netdev, qcount, qcount); + if (err) + goto err_detach_rsrc; + + otx2_setup_dev_hw_settings(vf); + + /* Assign default mac address */ + otx2_get_mac_from_af(netdev); + + netdev->hw_features = NETIF_F_RXCSUM | NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM | NETIF_F_RXHASH | + NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6; + netdev->features = netdev->hw_features; + + netdev->gso_max_segs = OTX2_MAX_GSO_SEGS; + netdev->watchdog_timeo = OTX2_TX_TIMEOUT; + + netdev->netdev_ops = &otx2vf_netdev_ops; + + /* MTU range: 68 - 9190 */ + netdev->min_mtu = OTX2_MIN_MTU; + netdev->max_mtu = OTX2_MAX_MTU; + + INIT_WORK(&vf->reset_task, otx2vf_reset_task); + + /* To distinguish, for LBK VFs set netdev name explicitly */ + if (is_otx2_lbkvf(vf->pdev)) { + int n; + + n = (vf->pcifunc >> RVU_PFVF_FUNC_SHIFT) & RVU_PFVF_FUNC_MASK; + /* Need to subtract 1 to get proper VF number */ + n -= 1; + snprintf(netdev->name, sizeof(netdev->name), "lbk%d", n); + } + + err = register_netdev(netdev); + if (err) { + dev_err(dev, "Failed to register netdevice\n"); + goto err_detach_rsrc; + } + + otx2vf_set_ethtool_ops(netdev); + + /* Enable pause frames by default */ + vf->flags |= OTX2_FLAG_RX_PAUSE_ENABLED; + vf->flags |= OTX2_FLAG_TX_PAUSE_ENABLED; + + return 0; + +err_detach_rsrc: + otx2_detach_resources(&vf->mbox); +err_disable_mbox_intr: + otx2vf_disable_mbox_intr(vf); +err_mbox_destroy: + otx2vf_vfaf_mbox_destroy(vf); +err_free_irq_vectors: + pci_free_irq_vectors(hw->pdev); +err_free_netdev: + pci_set_drvdata(pdev, NULL); + free_netdev(netdev); +err_release_regions: + pci_release_regions(pdev); + return err; +} + +static void otx2vf_remove(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct otx2_nic *vf; + + if (!netdev) + return; + + vf = netdev_priv(netdev); + + otx2vf_disable_mbox_intr(vf); + + otx2_detach_resources(&vf->mbox); + otx2vf_vfaf_mbox_destroy(vf); + pci_free_irq_vectors(vf->pdev); + pci_set_drvdata(pdev, NULL); + free_netdev(netdev); + + pci_release_regions(pdev); +} + +static struct pci_driver otx2vf_driver = { + .name = DRV_NAME, + .id_table = otx2_vf_id_table, + .probe = otx2vf_probe, + .remove = otx2vf_remove, + .shutdown = otx2vf_remove, +}; + +static int __init otx2vf_init_module(void) +{ + pr_info("%s: %s\n", DRV_NAME, DRV_STRING); + + return pci_register_driver(&otx2vf_driver); +} + +static void __exit otx2vf_cleanup_module(void) +{ + pci_unregister_driver(&otx2vf_driver); +} + +module_init(otx2vf_init_module); +module_exit(otx2vf_cleanup_module); diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index 97f270d30cce..3c89206f18a7 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -876,6 +876,7 @@ static int skge_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom } static const struct ethtool_ops skge_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS, .get_drvinfo = skge_get_drvinfo, .get_regs_len = skge_get_regs_len, .get_regs = skge_get_regs, diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index ebfd0ceac884..241f00716979 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -4400,6 +4400,10 @@ static int sky2_set_features(struct net_device *dev, netdev_features_t features) } static const struct ethtool_ops sky2_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_RX_USECS_IRQ | + ETHTOOL_COALESCE_RX_MAX_FRAMES_IRQ, .get_drvinfo = sky2_get_drvinfo, .get_wol = sky2_get_wol, .set_wol = sky2_set_wol, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index a1f20b205299..312e0a1ad43d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -78,6 +78,16 @@ config MLX5_ESWITCH Legacy SRIOV mode (L2 mac vlan steering based). Switchdev mode (eswitch offloads). +config MLX5_TC_CT + bool "MLX5 TC connection tracking offload support" + depends on MLX5_CORE_EN && NET_SWITCHDEV && NF_FLOW_TABLE && NET_ACT_CT && NET_TC_SKB_EXT + default y + help + Say Y here if you want to support offloading connection tracking rules + via tc ct action. + + If unsure, set to Y + config MLX5_CORE_EN_DCB bool "Data Center Bridging (DCB) Support" default y diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index f3dec6b41436..7408ae380d23 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -34,9 +34,10 @@ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \ - lib/geneve.o en/tc_tun_vxlan.o en/tc_tun_gre.o \ + lib/geneve.o en/mapping.o en/tc_tun_vxlan.o en/tc_tun_gre.o \ en/tc_tun_geneve.o diag/en_tc_tracepoint.o mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o +mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o # # Core extra diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c index e38495e4aa42..f8b2de4b04be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c @@ -3,20 +3,14 @@ #include "en/devlink.h" -int mlx5e_devlink_port_register(struct net_device *netdev) +int mlx5e_devlink_port_register(struct mlx5e_priv *priv) { - struct mlx5_core_dev *dev; - struct mlx5e_priv *priv; - struct devlink *devlink; - int err; + struct devlink *devlink = priv_to_devlink(priv->mdev); - priv = netdev_priv(netdev); - dev = priv->mdev; - - if (mlx5_core_is_pf(dev)) + if (mlx5_core_is_pf(priv->mdev)) devlink_port_attrs_set(&priv->dl_port, DEVLINK_PORT_FLAVOUR_PHYSICAL, - PCI_FUNC(dev->pdev->devfn), + PCI_FUNC(priv->mdev->pdev->devfn), false, 0, NULL, 0); else @@ -24,12 +18,12 @@ int mlx5e_devlink_port_register(struct net_device *netdev) DEVLINK_PORT_FLAVOUR_VIRTUAL, 0, false, 0, NULL, 0); - devlink = priv_to_devlink(dev); - err = devlink_port_register(devlink, &priv->dl_port, 1); - if (err) - return err; - devlink_port_type_eth_set(&priv->dl_port, netdev); - return 0; + return devlink_port_register(devlink, &priv->dl_port, 1); +} + +void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv) +{ + devlink_port_type_eth_set(&priv->dl_port, priv->netdev); } void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h index 3e5393a0901f..83123a801adc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h @@ -7,8 +7,9 @@ #include <net/devlink.h> #include "en.h" -int mlx5e_devlink_port_register(struct net_device *dev); +int mlx5e_devlink_port_register(struct mlx5e_priv *priv); void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv); +void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv); struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c new file mode 100644 index 000000000000..ea321e528749 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies */ + +#include <linux/jhash.h> +#include <linux/slab.h> +#include <linux/xarray.h> +#include <linux/hashtable.h> + +#include "mapping.h" + +#define MAPPING_GRACE_PERIOD 2000 + +struct mapping_ctx { + struct xarray xarray; + DECLARE_HASHTABLE(ht, 8); + struct mutex lock; /* Guards hashtable and xarray */ + unsigned long max_id; + size_t data_size; + bool delayed_removal; + struct delayed_work dwork; + struct list_head pending_list; + spinlock_t pending_list_lock; /* Guards pending list */ +}; + +struct mapping_item { + struct rcu_head rcu; + struct list_head list; + unsigned long timeout; + struct hlist_node node; + int cnt; + u32 id; + char data[]; +}; + +int mapping_add(struct mapping_ctx *ctx, void *data, u32 *id) +{ + struct mapping_item *mi; + int err = -ENOMEM; + u32 hash_key; + + mutex_lock(&ctx->lock); + + hash_key = jhash(data, ctx->data_size, 0); + hash_for_each_possible(ctx->ht, mi, node, hash_key) { + if (!memcmp(data, mi->data, ctx->data_size)) + goto attach; + } + + mi = kzalloc(sizeof(*mi) + ctx->data_size, GFP_KERNEL); + if (!mi) + goto err_alloc; + + memcpy(mi->data, data, ctx->data_size); + hash_add(ctx->ht, &mi->node, hash_key); + + err = xa_alloc(&ctx->xarray, &mi->id, mi, XA_LIMIT(1, ctx->max_id), + GFP_KERNEL); + if (err) + goto err_assign; +attach: + ++mi->cnt; + *id = mi->id; + + mutex_unlock(&ctx->lock); + + return 0; + +err_assign: + hash_del(&mi->node); + kfree(mi); +err_alloc: + mutex_unlock(&ctx->lock); + + return err; +} + +static void mapping_remove_and_free(struct mapping_ctx *ctx, + struct mapping_item *mi) +{ + xa_erase(&ctx->xarray, mi->id); + kfree_rcu(mi, rcu); +} + +static void mapping_free_item(struct mapping_ctx *ctx, + struct mapping_item *mi) +{ + if (!ctx->delayed_removal) { + mapping_remove_and_free(ctx, mi); + return; + } + + mi->timeout = jiffies + msecs_to_jiffies(MAPPING_GRACE_PERIOD); + + spin_lock(&ctx->pending_list_lock); + list_add_tail(&mi->list, &ctx->pending_list); + spin_unlock(&ctx->pending_list_lock); + + schedule_delayed_work(&ctx->dwork, MAPPING_GRACE_PERIOD); +} + +int mapping_remove(struct mapping_ctx *ctx, u32 id) +{ + unsigned long index = id; + struct mapping_item *mi; + int err = -ENOENT; + + mutex_lock(&ctx->lock); + mi = xa_load(&ctx->xarray, index); + if (!mi) + goto out; + err = 0; + + if (--mi->cnt > 0) + goto out; + + hash_del(&mi->node); + mapping_free_item(ctx, mi); +out: + mutex_unlock(&ctx->lock); + + return err; +} + +int mapping_find(struct mapping_ctx *ctx, u32 id, void *data) +{ + unsigned long index = id; + struct mapping_item *mi; + int err = -ENOENT; + + rcu_read_lock(); + mi = xa_load(&ctx->xarray, index); + if (!mi) + goto err_find; + + memcpy(data, mi->data, ctx->data_size); + err = 0; + +err_find: + rcu_read_unlock(); + return err; +} + +static void +mapping_remove_and_free_list(struct mapping_ctx *ctx, struct list_head *list) +{ + struct mapping_item *mi; + + list_for_each_entry(mi, list, list) + mapping_remove_and_free(ctx, mi); +} + +static void mapping_work_handler(struct work_struct *work) +{ + unsigned long min_timeout = 0, now = jiffies; + struct mapping_item *mi, *next; + LIST_HEAD(pending_items); + struct mapping_ctx *ctx; + + ctx = container_of(work, struct mapping_ctx, dwork.work); + + spin_lock(&ctx->pending_list_lock); + list_for_each_entry_safe(mi, next, &ctx->pending_list, list) { + if (time_after(now, mi->timeout)) + list_move(&mi->list, &pending_items); + else if (!min_timeout || + time_before(mi->timeout, min_timeout)) + min_timeout = mi->timeout; + } + spin_unlock(&ctx->pending_list_lock); + + mapping_remove_and_free_list(ctx, &pending_items); + + if (min_timeout) + schedule_delayed_work(&ctx->dwork, abs(min_timeout - now)); +} + +static void mapping_flush_work(struct mapping_ctx *ctx) +{ + if (!ctx->delayed_removal) + return; + + cancel_delayed_work_sync(&ctx->dwork); + mapping_remove_and_free_list(ctx, &ctx->pending_list); +} + +struct mapping_ctx * +mapping_create(size_t data_size, u32 max_id, bool delayed_removal) +{ + struct mapping_ctx *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return ERR_PTR(-ENOMEM); + + ctx->max_id = max_id ? max_id : UINT_MAX; + ctx->data_size = data_size; + + if (delayed_removal) { + INIT_DELAYED_WORK(&ctx->dwork, mapping_work_handler); + INIT_LIST_HEAD(&ctx->pending_list); + spin_lock_init(&ctx->pending_list_lock); + ctx->delayed_removal = true; + } + + mutex_init(&ctx->lock); + xa_init_flags(&ctx->xarray, XA_FLAGS_ALLOC1); + + return ctx; +} + +void mapping_destroy(struct mapping_ctx *ctx) +{ + mapping_flush_work(ctx); + xa_destroy(&ctx->xarray); + mutex_destroy(&ctx->lock); + + kfree(ctx); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h new file mode 100644 index 000000000000..285525cc5470 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies */ + +#ifndef __MLX5_MAPPING_H__ +#define __MLX5_MAPPING_H__ + +struct mapping_ctx; + +int mapping_add(struct mapping_ctx *ctx, void *data, u32 *id); +int mapping_remove(struct mapping_ctx *ctx, u32 id); +int mapping_find(struct mapping_ctx *ctx, u32 id, void *data); + +/* mapping uses an xarray to map data to ids in add(), and for find(). + * For locking, it uses a internal xarray spin lock for add()/remove(), + * find() uses rcu_read_lock(). + * Choosing delayed_removal postpones the removal of a previously mapped + * id by MAPPING_GRACE_PERIOD milliseconds. + * This is to avoid races against hardware, where we mark the packet in + * hardware with a previous id, and quick remove() and add() reusing the same + * previous id. Then find() will get the new mapping instead of the old + * which was used to mark the packet. + */ +struct mapping_ctx *mapping_create(size_t data_size, u32 max_id, + bool delayed_removal); +void mapping_destroy(struct mapping_ctx *ctx); + +#endif /* __MLX5_MAPPING_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c new file mode 100644 index 000000000000..a22ad6b90847 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -0,0 +1,1368 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_zones.h> +#include <net/netfilter/nf_conntrack_labels.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_acct.h> +#include <uapi/linux/tc_act/tc_pedit.h> +#include <net/tc_act/tc_ct.h> +#include <net/flow_offload.h> +#include <net/netfilter/nf_flow_table.h> +#include <linux/workqueue.h> + +#include "en/tc_ct.h" +#include "en.h" +#include "en_tc.h" +#include "en_rep.h" +#include "eswitch_offloads_chains.h" + +#define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen * 8) +#define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0) +#define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1) +#define MLX5_CT_STATE_TRK_BIT BIT(2) + +#define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8) +#define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0) +#define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX + +#define ct_dbg(fmt, args...)\ + netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args) + +struct mlx5_tc_ct_priv { + struct mlx5_eswitch *esw; + const struct net_device *netdev; + struct idr fte_ids; + struct idr tuple_ids; + struct rhashtable zone_ht; + struct mlx5_flow_table *ct; + struct mlx5_flow_table *ct_nat; + struct mlx5_flow_table *post_ct; + struct mutex control_lock; /* guards parallel adds/dels */ +}; + +struct mlx5_ct_flow { + struct mlx5_esw_flow_attr pre_ct_attr; + struct mlx5_esw_flow_attr post_ct_attr; + struct mlx5_flow_handle *pre_ct_rule; + struct mlx5_flow_handle *post_ct_rule; + struct mlx5_ct_ft *ft; + u32 fte_id; + u32 chain_mapping; +}; + +struct mlx5_ct_zone_rule { + struct mlx5_flow_handle *rule; + struct mlx5_esw_flow_attr attr; + int tupleid; + bool nat; +}; + +struct mlx5_ct_ft { + struct rhash_head node; + u16 zone; + refcount_t refcount; + struct nf_flowtable *nf_ft; + struct mlx5_tc_ct_priv *ct_priv; + struct rhashtable ct_entries_ht; + struct list_head ct_entries_list; +}; + +struct mlx5_ct_entry { + struct list_head list; + u16 zone; + struct rhash_head node; + struct flow_rule *flow_rule; + struct mlx5_fc *counter; + unsigned long lastuse; + unsigned long cookie; + unsigned long restore_cookie; + struct mlx5_ct_zone_rule zone_rules[2]; +}; + +static const struct rhashtable_params cts_ht_params = { + .head_offset = offsetof(struct mlx5_ct_entry, node), + .key_offset = offsetof(struct mlx5_ct_entry, cookie), + .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie), + .automatic_shrinking = true, + .min_size = 16 * 1024, +}; + +static const struct rhashtable_params zone_params = { + .head_offset = offsetof(struct mlx5_ct_ft, node), + .key_offset = offsetof(struct mlx5_ct_ft, zone), + .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone), + .automatic_shrinking = true, +}; + +static struct mlx5_tc_ct_priv * +mlx5_tc_ct_get_ct_priv(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + return uplink_priv->ct_priv; +} + +static int +mlx5_tc_ct_set_tuple_match(struct mlx5_flow_spec *spec, + struct flow_rule *rule) +{ + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + u16 addr_type = 0; + u8 ip_proto = 0; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, + ntohs(match.mask->n_proto)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + ntohs(match.key->n_proto)); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, + match.mask->ip_proto); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, + match.key->ip_proto); + + ip_proto = match.key->ip_proto; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; + + flow_rule_match_control(rule, &match); + addr_type = match.key->addr_type; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_ipv4_addrs(rule, &match); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &match.mask->src, sizeof(match.mask->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &match.key->src, sizeof(match.key->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &match.mask->dst, sizeof(match.mask->dst)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &match.key->dst, sizeof(match.key->dst)); + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_ipv6_addrs(rule, &match); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.mask->src, sizeof(match.mask->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.key->src, sizeof(match.key->src)); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.mask->dst, sizeof(match.mask->dst)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.key->dst, sizeof(match.key->dst)); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; + + flow_rule_match_ports(rule, &match); + switch (ip_proto) { + case IPPROTO_TCP: + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + tcp_sport, ntohs(match.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + tcp_sport, ntohs(match.key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + tcp_dport, ntohs(match.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + tcp_dport, ntohs(match.key->dst)); + break; + + case IPPROTO_UDP: + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_sport, ntohs(match.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_sport, ntohs(match.key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_dport, ntohs(match.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_dport, ntohs(match.key->dst)); + break; + default: + break; + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { + struct flow_match_tcp match; + + flow_rule_match_tcp(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, + ntohs(match.mask->flags)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, + ntohs(match.key->flags)); + } + + return 0; +} + +static void +mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_ct_entry *entry, + bool nat) +{ + struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; + struct mlx5_esw_flow_attr *attr = &zone_rule->attr; + struct mlx5_eswitch *esw = ct_priv->esw; + + ct_dbg("Deleting ct entry rule in zone %d", entry->zone); + + mlx5_eswitch_del_offloaded_rule(esw, zone_rule->rule, attr); + mlx5_modify_header_dealloc(esw->dev, attr->modify_hdr); + idr_remove(&ct_priv->tuple_ids, zone_rule->tupleid); +} + +static void +mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_ct_entry *entry) +{ + mlx5_tc_ct_entry_del_rule(ct_priv, entry, true); + mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); + + mlx5_fc_destroy(ct_priv->esw->dev, entry->counter); +} + +static struct flow_action_entry * +mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule) +{ + struct flow_action *flow_action = &flow_rule->action; + struct flow_action_entry *act; + int i; + + flow_action_for_each(i, act, flow_action) { + if (act->id == FLOW_ACTION_CT_METADATA) + return act; + } + + return NULL; +} + +static int +mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5e_tc_mod_hdr_acts *mod_acts, + u8 ct_state, + u32 mark, + u32 label, + u32 tupleid) +{ + struct mlx5_eswitch *esw = ct_priv->esw; + int err; + + err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, + CTSTATE_TO_REG, ct_state); + if (err) + return err; + + err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, + MARK_TO_REG, mark); + if (err) + return err; + + err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, + LABELS_TO_REG, label); + if (err) + return err; + + err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, + TUPLEID_TO_REG, tupleid); + if (err) + return err; + + return 0; +} + +static int +mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act, + char *modact) +{ + u32 offset = act->mangle.offset, field; + + switch (act->mangle.htype) { + case FLOW_ACT_MANGLE_HDR_TYPE_IP4: + MLX5_SET(set_action_in, modact, length, 0); + if (offset == offsetof(struct iphdr, saddr)) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV4; + else if (offset == offsetof(struct iphdr, daddr)) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV4; + else + return -EOPNOTSUPP; + break; + + case FLOW_ACT_MANGLE_HDR_TYPE_IP6: + MLX5_SET(set_action_in, modact, length, 0); + if (offset == offsetof(struct ipv6hdr, saddr)) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0; + else if (offset == offsetof(struct ipv6hdr, saddr) + 4) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32; + else if (offset == offsetof(struct ipv6hdr, saddr) + 8) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64; + else if (offset == offsetof(struct ipv6hdr, saddr) + 12) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96; + else if (offset == offsetof(struct ipv6hdr, daddr)) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0; + else if (offset == offsetof(struct ipv6hdr, daddr) + 4) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32; + else if (offset == offsetof(struct ipv6hdr, daddr) + 8) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64; + else if (offset == offsetof(struct ipv6hdr, daddr) + 12) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96; + else + return -EOPNOTSUPP; + break; + + case FLOW_ACT_MANGLE_HDR_TYPE_TCP: + MLX5_SET(set_action_in, modact, length, 16); + if (offset == offsetof(struct tcphdr, source)) + field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT; + else if (offset == offsetof(struct tcphdr, dest)) + field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT; + else + return -EOPNOTSUPP; + break; + + case FLOW_ACT_MANGLE_HDR_TYPE_UDP: + MLX5_SET(set_action_in, modact, length, 16); + if (offset == offsetof(struct udphdr, source)) + field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT; + else if (offset == offsetof(struct udphdr, dest)) + field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT; + else + return -EOPNOTSUPP; + break; + + default: + return -EOPNOTSUPP; + } + + MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, modact, offset, 0); + MLX5_SET(set_action_in, modact, field, field); + MLX5_SET(set_action_in, modact, data, act->mangle.val); + + return 0; +} + +static int +mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv, + struct flow_rule *flow_rule, + struct mlx5e_tc_mod_hdr_acts *mod_acts) +{ + struct flow_action *flow_action = &flow_rule->action; + struct mlx5_core_dev *mdev = ct_priv->esw->dev; + struct flow_action_entry *act; + size_t action_size; + char *modact; + int err, i; + + action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); + + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_MANGLE: { + err = alloc_mod_hdr_actions(mdev, + MLX5_FLOW_NAMESPACE_FDB, + mod_acts); + if (err) + return err; + + modact = mod_acts->actions + + mod_acts->num_actions * action_size; + + err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact); + if (err) + return err; + + mod_acts->num_actions++; + } + break; + + case FLOW_ACTION_CT_METADATA: + /* Handled earlier */ + continue; + default: + return -EOPNOTSUPP; + } + } + + return 0; +} + +static int +mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_esw_flow_attr *attr, + struct flow_rule *flow_rule, + u32 tupleid, + bool nat) +{ + struct mlx5e_tc_mod_hdr_acts mod_acts = {}; + struct mlx5_eswitch *esw = ct_priv->esw; + struct mlx5_modify_hdr *mod_hdr; + struct flow_action_entry *meta; + int err; + + meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule); + if (!meta) + return -EOPNOTSUPP; + + if (meta->ct_metadata.labels[1] || + meta->ct_metadata.labels[2] || + meta->ct_metadata.labels[3]) { + ct_dbg("Failed to offload ct entry due to unsupported label"); + return -EOPNOTSUPP; + } + + if (nat) { + err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, + &mod_acts); + if (err) + goto err_mapping; + } + + err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts, + (MLX5_CT_STATE_ESTABLISHED_BIT | + MLX5_CT_STATE_TRK_BIT), + meta->ct_metadata.mark, + meta->ct_metadata.labels[0], + tupleid); + if (err) + goto err_mapping; + + mod_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB, + mod_acts.num_actions, + mod_acts.actions); + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + goto err_mapping; + } + attr->modify_hdr = mod_hdr; + + dealloc_mod_hdr_actions(&mod_acts); + return 0; + +err_mapping: + dealloc_mod_hdr_actions(&mod_acts); + return err; +} + +static int +mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, + struct flow_rule *flow_rule, + struct mlx5_ct_entry *entry, + bool nat) +{ + struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; + struct mlx5_esw_flow_attr *attr = &zone_rule->attr; + struct mlx5_eswitch *esw = ct_priv->esw; + struct mlx5_flow_spec *spec = NULL; + u32 tupleid = 1; + int err; + + zone_rule->nat = nat; + + spec = kzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + /* Get tuple unique id */ + err = idr_alloc_u32(&ct_priv->tuple_ids, zone_rule, &tupleid, + TUPLE_ID_MAX, GFP_KERNEL); + if (err) { + netdev_warn(ct_priv->netdev, + "Failed to allocate tuple id, err: %d\n", err); + goto err_idr_alloc; + } + zone_rule->tupleid = tupleid; + + err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, + tupleid, nat); + if (err) { + ct_dbg("Failed to create ct entry mod hdr"); + goto err_mod_hdr; + } + + attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->dest_chain = 0; + attr->dest_ft = ct_priv->post_ct; + attr->fdb = nat ? ct_priv->ct_nat : ct_priv->ct; + attr->outer_match_level = MLX5_MATCH_L4; + attr->counter = entry->counter; + attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT; + + mlx5_tc_ct_set_tuple_match(spec, flow_rule); + mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, + entry->zone & MLX5_CT_ZONE_MASK, + MLX5_CT_ZONE_MASK); + + zone_rule->rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); + if (IS_ERR(zone_rule->rule)) { + err = PTR_ERR(zone_rule->rule); + ct_dbg("Failed to add ct entry rule, nat: %d", nat); + goto err_rule; + } + + kfree(spec); + ct_dbg("Offloaded ct entry rule in zone %d", entry->zone); + + return 0; + +err_rule: + mlx5_modify_header_dealloc(esw->dev, attr->modify_hdr); +err_mod_hdr: + idr_remove(&ct_priv->tuple_ids, zone_rule->tupleid); +err_idr_alloc: + kfree(spec); + return err; +} + +static int +mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv, + struct flow_rule *flow_rule, + struct mlx5_ct_entry *entry) +{ + struct mlx5_eswitch *esw = ct_priv->esw; + int err; + + entry->counter = mlx5_fc_create(esw->dev, true); + if (IS_ERR(entry->counter)) { + err = PTR_ERR(entry->counter); + ct_dbg("Failed to create counter for ct entry"); + return err; + } + + err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false); + if (err) + goto err_orig; + + err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true); + if (err) + goto err_nat; + + return 0; + +err_nat: + mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); +err_orig: + mlx5_fc_destroy(esw->dev, entry->counter); + return err; +} + +static int +mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, + struct flow_cls_offload *flow) +{ + struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow); + struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; + struct flow_action_entry *meta_action; + unsigned long cookie = flow->cookie; + struct mlx5_ct_entry *entry; + int err; + + meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule); + if (!meta_action) + return -EOPNOTSUPP; + + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, + cts_ht_params); + if (entry) + return 0; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->zone = ft->zone; + entry->flow_rule = flow_rule; + entry->cookie = flow->cookie; + entry->restore_cookie = meta_action->ct_metadata.cookie; + + err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry); + if (err) + goto err_rules; + + err = rhashtable_insert_fast(&ft->ct_entries_ht, &entry->node, + cts_ht_params); + if (err) + goto err_insert; + + list_add(&entry->list, &ft->ct_entries_list); + + return 0; + +err_insert: + mlx5_tc_ct_entry_del_rules(ct_priv, entry); +err_rules: + kfree(entry); + netdev_warn(ct_priv->netdev, + "Failed to offload ct entry, err: %d\n", err); + return err; +} + +static int +mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft, + struct flow_cls_offload *flow) +{ + unsigned long cookie = flow->cookie; + struct mlx5_ct_entry *entry; + + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, + cts_ht_params); + if (!entry) + return -ENOENT; + + mlx5_tc_ct_entry_del_rules(ft->ct_priv, entry); + WARN_ON(rhashtable_remove_fast(&ft->ct_entries_ht, + &entry->node, + cts_ht_params)); + list_del(&entry->list); + kfree(entry); + + return 0; +} + +static int +mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft, + struct flow_cls_offload *f) +{ + unsigned long cookie = f->cookie; + struct mlx5_ct_entry *entry; + u64 lastuse, packets, bytes; + + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, + cts_ht_params); + if (!entry) + return -ENOENT; + + mlx5_fc_query_cached(entry->counter, &bytes, &packets, &lastuse); + flow_stats_update(&f->stats, bytes, packets, lastuse); + + return 0; +} + +static int +mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct flow_cls_offload *f = type_data; + struct mlx5_ct_ft *ft = cb_priv; + + if (type != TC_SETUP_CLSFLOWER) + return -EOPNOTSUPP; + + switch (f->command) { + case FLOW_CLS_REPLACE: + return mlx5_tc_ct_block_flow_offload_add(ft, f); + case FLOW_CLS_DESTROY: + return mlx5_tc_ct_block_flow_offload_del(ft, f); + case FLOW_CLS_STATS: + return mlx5_tc_ct_block_flow_offload_stats(ft, f); + default: + break; + }; + + return -EOPNOTSUPP; +} + +int +mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct netlink_ext_ack *extack) +{ + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + struct flow_dissector_key_ct *mask, *key; + bool trk, est, untrk, unest, new; + u32 ctstate = 0, ctstate_mask = 0; + u16 ct_state_on, ct_state_off; + u16 ct_state, ct_state_mask; + struct flow_match_ct match; + + if (!flow_rule_match_key(f->rule, FLOW_DISSECTOR_KEY_CT)) + return 0; + + if (!ct_priv) { + NL_SET_ERR_MSG_MOD(extack, + "offload of ct matching isn't available"); + return -EOPNOTSUPP; + } + + flow_rule_match_ct(f->rule, &match); + + key = match.key; + mask = match.mask; + + ct_state = key->ct_state; + ct_state_mask = mask->ct_state; + + if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED | + TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED | + TCA_FLOWER_KEY_CT_FLAGS_NEW)) { + NL_SET_ERR_MSG_MOD(extack, + "only ct_state trk, est and new are supported for offload"); + return -EOPNOTSUPP; + } + + if (mask->ct_labels[1] || mask->ct_labels[2] || mask->ct_labels[3]) { + NL_SET_ERR_MSG_MOD(extack, + "only lower 32bits of ct_labels are supported for offload"); + return -EOPNOTSUPP; + } + + ct_state_on = ct_state & ct_state_mask; + ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask; + trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; + new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW; + est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; + untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; + unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; + + ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0; + ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; + ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0; + ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; + + if (new) { + NL_SET_ERR_MSG_MOD(extack, + "matching on ct_state +new isn't supported"); + return -EOPNOTSUPP; + } + + if (mask->ct_zone) + mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, + key->ct_zone, MLX5_CT_ZONE_MASK); + if (ctstate_mask) + mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, + ctstate, ctstate_mask); + if (mask->ct_mark) + mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG, + key->ct_mark, mask->ct_mark); + if (mask->ct_labels[0]) + mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, + key->ct_labels[0], + mask->ct_labels[0]); + + return 0; +} + +int +mlx5_tc_ct_parse_action(struct mlx5e_priv *priv, + struct mlx5_esw_flow_attr *attr, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + + if (!ct_priv) { + NL_SET_ERR_MSG_MOD(extack, + "offload of ct action isn't available"); + return -EOPNOTSUPP; + } + + attr->ct_attr.zone = act->ct.zone; + attr->ct_attr.ct_action = act->ct.action; + attr->ct_attr.nf_ft = act->ct.flow_table; + + return 0; +} + +static struct mlx5_ct_ft * +mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, + struct nf_flowtable *nf_ft) +{ + struct mlx5_ct_ft *ft; + int err; + + ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params); + if (ft) { + refcount_inc(&ft->refcount); + return ft; + } + + ft = kzalloc(sizeof(*ft), GFP_KERNEL); + if (!ft) + return ERR_PTR(-ENOMEM); + + ft->zone = zone; + ft->nf_ft = nf_ft; + ft->ct_priv = ct_priv; + INIT_LIST_HEAD(&ft->ct_entries_list); + refcount_set(&ft->refcount, 1); + + err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params); + if (err) + goto err_init; + + err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node, + zone_params); + if (err) + goto err_insert; + + err = nf_flow_table_offload_add_cb(ft->nf_ft, + mlx5_tc_ct_block_flow_offload, ft); + if (err) + goto err_add_cb; + + return ft; + +err_add_cb: + rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); +err_insert: + rhashtable_destroy(&ft->ct_entries_ht); +err_init: + kfree(ft); + return ERR_PTR(err); +} + +static void +mlx5_tc_ct_flush_ft(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) +{ + struct mlx5_ct_entry *entry; + + list_for_each_entry(entry, &ft->ct_entries_list, list) + mlx5_tc_ct_entry_del_rules(ft->ct_priv, entry); +} + +static void +mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) +{ + if (!refcount_dec_and_test(&ft->refcount)) + return; + + nf_flow_table_offload_del_cb(ft->nf_ft, + mlx5_tc_ct_block_flow_offload, ft); + mlx5_tc_ct_flush_ft(ct_priv, ft); + rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); + rhashtable_destroy(&ft->ct_entries_ht); + kfree(ft); +} + +/* We translate the tc filter with CT action to the following HW model: + * + * +-------------------+ +--------------------+ +--------------+ + * + pre_ct (tc chain) +----->+ CT (nat or no nat) +--->+ post_ct +-----> + * + original match + | + tuple + zone match + | + fte_id match + | + * +-------------------+ | +--------------------+ | +--------------+ | + * v v v + * set chain miss mapping set mark original + * set fte_id set label filter + * set zone set established actions + * set tunnel_id do nat (if needed) + * do decap + */ +static int +__mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *orig_spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_handle **flow_rule) +{ + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT; + struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; + struct mlx5_flow_spec *post_ct_spec = NULL; + struct mlx5_eswitch *esw = ct_priv->esw; + struct mlx5_esw_flow_attr *pre_ct_attr; + struct mlx5_modify_hdr *mod_hdr; + struct mlx5_flow_handle *rule; + struct mlx5_ct_flow *ct_flow; + int chain_mapping = 0, err; + struct mlx5_ct_ft *ft; + u32 fte_id = 1; + + post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL); + ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); + if (!post_ct_spec || !ct_flow) { + kfree(post_ct_spec); + kfree(ct_flow); + return -ENOMEM; + } + + /* Register for CT established events */ + ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone, + attr->ct_attr.nf_ft); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + ct_dbg("Failed to register to ft callback"); + goto err_ft; + } + ct_flow->ft = ft; + + err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id, + MLX5_FTE_ID_MAX, GFP_KERNEL); + if (err) { + netdev_warn(priv->netdev, + "Failed to allocate fte id, err: %d\n", err); + goto err_idr; + } + ct_flow->fte_id = fte_id; + + /* Base esw attributes of both rules on original rule attribute */ + pre_ct_attr = &ct_flow->pre_ct_attr; + memcpy(pre_ct_attr, attr, sizeof(*attr)); + memcpy(&ct_flow->post_ct_attr, attr, sizeof(*attr)); + + /* Modify the original rule's action to fwd and modify, leave decap */ + pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP; + pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + /* Write chain miss tag for miss in ct table as we + * don't go though all prios of this chain as normal tc rules + * miss. + */ + err = mlx5_esw_chains_get_chain_mapping(esw, attr->chain, + &chain_mapping); + if (err) { + ct_dbg("Failed to get chain register mapping for chain"); + goto err_get_chain; + } + ct_flow->chain_mapping = chain_mapping; + + err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, + CHAIN_TO_REG, chain_mapping); + if (err) { + ct_dbg("Failed to set chain register mapping"); + goto err_mapping; + } + + err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, ZONE_TO_REG, + attr->ct_attr.zone & + MLX5_CT_ZONE_MASK); + if (err) { + ct_dbg("Failed to set zone register mapping"); + goto err_mapping; + } + + err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, + FTEID_TO_REG, fte_id); + if (err) { + ct_dbg("Failed to set fte_id register mapping"); + goto err_mapping; + } + + /* If original flow is decap, we do it before going into ct table + * so add a rewrite for the tunnel match_id. + */ + if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) && + attr->chain == 0) { + u32 tun_id = mlx5e_tc_get_flow_tun_id(flow); + + err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, + TUNNEL_TO_REG, + tun_id); + if (err) { + ct_dbg("Failed to set tunnel register mapping"); + goto err_mapping; + } + } + + mod_hdr = mlx5_modify_header_alloc(esw->dev, + MLX5_FLOW_NAMESPACE_FDB, + pre_mod_acts.num_actions, + pre_mod_acts.actions); + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + ct_dbg("Failed to create pre ct mod hdr"); + goto err_mapping; + } + pre_ct_attr->modify_hdr = mod_hdr; + + /* Post ct rule matches on fte_id and executes original rule's + * tc rule action + */ + mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG, + fte_id, MLX5_FTE_ID_MASK); + + /* Put post_ct rule on post_ct fdb */ + ct_flow->post_ct_attr.chain = 0; + ct_flow->post_ct_attr.prio = 0; + ct_flow->post_ct_attr.fdb = ct_priv->post_ct; + + ct_flow->post_ct_attr.inner_match_level = MLX5_MATCH_NONE; + ct_flow->post_ct_attr.outer_match_level = MLX5_MATCH_NONE; + ct_flow->post_ct_attr.action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP); + rule = mlx5_eswitch_add_offloaded_rule(esw, post_ct_spec, + &ct_flow->post_ct_attr); + ct_flow->post_ct_rule = rule; + if (IS_ERR(ct_flow->post_ct_rule)) { + err = PTR_ERR(ct_flow->post_ct_rule); + ct_dbg("Failed to add post ct rule"); + goto err_insert_post_ct; + } + + /* Change original rule point to ct table */ + pre_ct_attr->dest_chain = 0; + pre_ct_attr->dest_ft = nat ? ct_priv->ct_nat : ct_priv->ct; + ct_flow->pre_ct_rule = mlx5_eswitch_add_offloaded_rule(esw, + orig_spec, + pre_ct_attr); + if (IS_ERR(ct_flow->pre_ct_rule)) { + err = PTR_ERR(ct_flow->pre_ct_rule); + ct_dbg("Failed to add pre ct rule"); + goto err_insert_orig; + } + + attr->ct_attr.ct_flow = ct_flow; + *flow_rule = ct_flow->post_ct_rule; + dealloc_mod_hdr_actions(&pre_mod_acts); + kfree(post_ct_spec); + + return 0; + +err_insert_orig: + mlx5_eswitch_del_offloaded_rule(ct_priv->esw, ct_flow->post_ct_rule, + &ct_flow->post_ct_attr); +err_insert_post_ct: + mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); +err_mapping: + dealloc_mod_hdr_actions(&pre_mod_acts); + mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping); +err_get_chain: + idr_remove(&ct_priv->fte_ids, fte_id); +err_idr: + mlx5_tc_ct_del_ft_cb(ct_priv, ft); +err_ft: + kfree(post_ct_spec); + kfree(ct_flow); + netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err); + return err; +} + +static int +__mlx5_tc_ct_flow_offload_clear(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *orig_spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_acts, + struct mlx5_flow_handle **flow_rule) +{ + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + struct mlx5_eswitch *esw = ct_priv->esw; + struct mlx5_esw_flow_attr *pre_ct_attr; + struct mlx5_modify_hdr *mod_hdr; + struct mlx5_flow_handle *rule; + struct mlx5_ct_flow *ct_flow; + int err; + + ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); + if (!ct_flow) + return -ENOMEM; + + /* Base esw attributes on original rule attribute */ + pre_ct_attr = &ct_flow->pre_ct_attr; + memcpy(pre_ct_attr, attr, sizeof(*attr)); + + err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0); + if (err) { + ct_dbg("Failed to set register for ct clear"); + goto err_set_registers; + } + + mod_hdr = mlx5_modify_header_alloc(esw->dev, + MLX5_FLOW_NAMESPACE_FDB, + mod_acts->num_actions, + mod_acts->actions); + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + ct_dbg("Failed to add create ct clear mod hdr"); + goto err_set_registers; + } + + dealloc_mod_hdr_actions(mod_acts); + pre_ct_attr->modify_hdr = mod_hdr; + pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + rule = mlx5_eswitch_add_offloaded_rule(esw, orig_spec, pre_ct_attr); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + ct_dbg("Failed to add ct clear rule"); + goto err_insert; + } + + attr->ct_attr.ct_flow = ct_flow; + ct_flow->pre_ct_rule = rule; + *flow_rule = rule; + + return 0; + +err_insert: + mlx5_modify_header_dealloc(priv->mdev, mod_hdr); +err_set_registers: + netdev_warn(priv->netdev, + "Failed to offload ct clear flow, err %d\n", err); + return err; +} + +struct mlx5_flow_handle * +mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +{ + bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + struct mlx5_flow_handle *rule; + int err; + + if (!ct_priv) + return ERR_PTR(-EOPNOTSUPP); + + mutex_lock(&ct_priv->control_lock); + if (clear_action) + err = __mlx5_tc_ct_flow_offload_clear(priv, flow, spec, attr, + mod_hdr_acts, &rule); + else + err = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr, + &rule); + mutex_unlock(&ct_priv->control_lock); + if (err) + return ERR_PTR(err); + + return rule; +} + +static void +__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_ct_flow *ct_flow) +{ + struct mlx5_esw_flow_attr *pre_ct_attr = &ct_flow->pre_ct_attr; + struct mlx5_eswitch *esw = ct_priv->esw; + + mlx5_eswitch_del_offloaded_rule(esw, ct_flow->pre_ct_rule, + pre_ct_attr); + mlx5_modify_header_dealloc(esw->dev, pre_ct_attr->modify_hdr); + + if (ct_flow->post_ct_rule) { + mlx5_eswitch_del_offloaded_rule(esw, ct_flow->post_ct_rule, + &ct_flow->post_ct_attr); + mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping); + idr_remove(&ct_priv->fte_ids, ct_flow->fte_id); + mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft); + } + + kfree(ct_flow); +} + +void +mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, + struct mlx5_esw_flow_attr *attr) +{ + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow; + + /* We are called on error to clean up stuff from parsing + * but we don't have anything for now + */ + if (!ct_flow) + return; + + mutex_lock(&ct_priv->control_lock); + __mlx5_tc_ct_delete_flow(ct_priv, ct_flow); + mutex_unlock(&ct_priv->control_lock); +} + +static int +mlx5_tc_ct_init_check_support(struct mlx5_eswitch *esw, + const char **err_msg) +{ +#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + /* cannot restore chain ID on HW miss */ + + *err_msg = "tc skb extension missing"; + return -EOPNOTSUPP; +#endif + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) { + *err_msg = "firmware level support is missing"; + return -EOPNOTSUPP; + } + + if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) { + /* vlan workaround should be avoided for multi chain rules. + * This is just a sanity check as pop vlan action should + * be supported by any FW that supports ignore_flow_level + */ + + *err_msg = "firmware vlan actions support is missing"; + return -EOPNOTSUPP; + } + + if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, + fdb_modify_header_fwd_to_table)) { + /* CT always writes to registers which are mod header actions. + * Therefore, mod header and goto is required + */ + + *err_msg = "firmware fwd and modify support is missing"; + return -EOPNOTSUPP; + } + + if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) { + *err_msg = "register loopback isn't supported"; + return -EOPNOTSUPP; + } + + return 0; +} + +static void +mlx5_tc_ct_init_err(struct mlx5e_rep_priv *rpriv, const char *msg, int err) +{ + if (msg) + netdev_warn(rpriv->netdev, + "tc ct offload not supported, %s, err: %d\n", + msg, err); + else + netdev_warn(rpriv->netdev, + "tc ct offload not supported, err: %d\n", + err); +} + +int +mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv) +{ + struct mlx5_tc_ct_priv *ct_priv; + struct mlx5e_rep_priv *rpriv; + struct mlx5_eswitch *esw; + struct mlx5e_priv *priv; + const char *msg; + int err; + + rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); + priv = netdev_priv(rpriv->netdev); + esw = priv->mdev->priv.eswitch; + + err = mlx5_tc_ct_init_check_support(esw, &msg); + if (err) { + mlx5_tc_ct_init_err(rpriv, msg, err); + goto err_support; + } + + ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL); + if (!ct_priv) { + mlx5_tc_ct_init_err(rpriv, NULL, -ENOMEM); + goto err_alloc; + } + + ct_priv->esw = esw; + ct_priv->netdev = rpriv->netdev; + ct_priv->ct = mlx5_esw_chains_create_global_table(esw); + if (IS_ERR(ct_priv->ct)) { + err = PTR_ERR(ct_priv->ct); + mlx5_tc_ct_init_err(rpriv, "failed to create ct table", err); + goto err_ct_tbl; + } + + ct_priv->ct_nat = mlx5_esw_chains_create_global_table(esw); + if (IS_ERR(ct_priv->ct_nat)) { + err = PTR_ERR(ct_priv->ct_nat); + mlx5_tc_ct_init_err(rpriv, "failed to create ct nat table", + err); + goto err_ct_nat_tbl; + } + + ct_priv->post_ct = mlx5_esw_chains_create_global_table(esw); + if (IS_ERR(ct_priv->post_ct)) { + err = PTR_ERR(ct_priv->post_ct); + mlx5_tc_ct_init_err(rpriv, "failed to create post ct table", + err); + goto err_post_ct_tbl; + } + + idr_init(&ct_priv->fte_ids); + idr_init(&ct_priv->tuple_ids); + mutex_init(&ct_priv->control_lock); + rhashtable_init(&ct_priv->zone_ht, &zone_params); + + /* Done, set ct_priv to know it initializted */ + uplink_priv->ct_priv = ct_priv; + + return 0; + +err_post_ct_tbl: + mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct_nat); +err_ct_nat_tbl: + mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct); +err_ct_tbl: + kfree(ct_priv); +err_alloc: +err_support: + + return 0; +} + +void +mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv) +{ + struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv; + + if (!ct_priv) + return; + + mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->post_ct); + mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct_nat); + mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct); + + rhashtable_destroy(&ct_priv->zone_ht); + mutex_destroy(&ct_priv->control_lock); + idr_destroy(&ct_priv->tuple_ids); + idr_destroy(&ct_priv->fte_ids); + kfree(ct_priv); + + uplink_priv->ct_priv = NULL; +} + +bool +mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv, + struct sk_buff *skb, u32 tupleid) +{ + struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv; + struct mlx5_ct_zone_rule *zone_rule; + struct mlx5_ct_entry *entry; + + if (!ct_priv || !tupleid) + return true; + + zone_rule = idr_find(&ct_priv->tuple_ids, tupleid); + if (!zone_rule) + return false; + + entry = container_of(zone_rule, struct mlx5_ct_entry, + zone_rules[zone_rule->nat]); + tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie); + + return true; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h new file mode 100644 index 000000000000..091d305b633e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TC_CT_H__ +#define __MLX5_EN_TC_CT_H__ + +#include <net/pkt_cls.h> +#include <linux/mlx5/fs.h> +#include <net/tc_act/tc_ct.h> + +#include "en.h" + +struct mlx5_esw_flow_attr; +struct mlx5e_tc_mod_hdr_acts; +struct mlx5_rep_uplink_priv; +struct mlx5e_tc_flow; +struct mlx5e_priv; + +struct mlx5_ct_flow; + +struct nf_flowtable; + +struct mlx5_ct_attr { + u16 zone; + u16 ct_action; + struct mlx5_ct_flow *ct_flow; + struct nf_flowtable *nf_ft; +}; + +#define zone_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_2,\ + .moffset = 0,\ + .mlen = 2,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_2) + 2,\ +} + +#define ctstate_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_2,\ + .moffset = 2,\ + .mlen = 2,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_2),\ +} + +#define mark_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_3,\ + .moffset = 0,\ + .mlen = 4,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_3),\ +} + +#define labels_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_4,\ + .moffset = 0,\ + .mlen = 4,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_4),\ +} + +#define fteid_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_5,\ + .moffset = 0,\ + .mlen = 4,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_5),\ +} + +#define tupleid_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,\ + .moffset = 0,\ + .mlen = 3,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_1),\ +} + +#define TUPLE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[TUPLEID_TO_REG].mlen * 8) +#define TUPLE_ID_MAX GENMASK(TUPLE_ID_BITS - 1, 0) + +#if IS_ENABLED(CONFIG_MLX5_TC_CT) + +int +mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv); +void +mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv); + +int +mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct netlink_ext_ack *extack); +int +mlx5_tc_ct_parse_action(struct mlx5e_priv *priv, + struct mlx5_esw_flow_attr *attr, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack); + +struct mlx5_flow_handle * +mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); +void +mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_esw_flow_attr *attr); + +bool +mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv, + struct sk_buff *skb, u32 tupleid); + +#else /* CONFIG_MLX5_TC_CT */ + +static inline int +mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv) +{ + return 0; +} + +static inline void +mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv) +{ +} + +static inline int +mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct netlink_ext_ack *extack) +{ + if (!flow_rule_match_key(f->rule, FLOW_DISSECTOR_KEY_CT)) + return 0; + + NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled."); + netdev_warn(priv->netdev, "mlx5 tc ct offload isn't enabled.\n"); + return -EOPNOTSUPP; +} + +static inline int +mlx5_tc_ct_parse_action(struct mlx5e_priv *priv, + struct mlx5_esw_flow_attr *attr, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled."); + netdev_warn(priv->netdev, "mlx5 tc ct offload isn't enabled.\n"); + return -EOPNOTSUPP; +} + +static inline struct mlx5_flow_handle * +mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void +mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_esw_flow_attr *attr) +{ +} + +static inline bool +mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv, + struct sk_buff *skb, u32 tupleid) +{ + if (!tupleid) + return true; + + return false; +} + +#endif /* !IS_ENABLED(CONFIG_MLX5_TC_CT) */ +#endif /* __MLX5_EN_TC_CT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index af4ebd2951b5..b45c3f46570b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -66,6 +66,9 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, mlx5e_is_uplink_rep(netdev_priv(*out_dev)))) return -EOPNOTSUPP; + if (mlx5e_eswitch_uplink_rep(priv->netdev) && *out_dev != priv->netdev) + return -EOPNOTSUPP; + return 0; } @@ -469,10 +472,15 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct flow_cls_offload *f, - void *headers_c, - void *headers_v, u8 *match_level) + u8 *match_level) { struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + struct netlink_ext_ack *extack = f->common.extack; int err = 0; if (!tunnel) { @@ -499,6 +507,109 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, goto out; } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_match_control match; + u16 addr_type; + + flow_rule_match_enc_control(rule, &match); + addr_type = match.key->addr_type; + + /* For tunnel addr_type used same key id`s as for non-tunnel */ + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_enc_ipv4_addrs(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4, + ntohl(match.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4, + ntohl(match.key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4, + ntohl(match.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4, + ntohl(match.key->dst)); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, + ethertype); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + ETH_P_IP); + } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_enc_ipv6_addrs(rule, &match); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, + ethertype); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + ETH_P_IPV6); + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { + struct flow_match_ip match; + + flow_rule_match_enc_ip(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, + match.mask->tos & 0x3); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, + match.key->tos & 0x3); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, + match.mask->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, + match.key->tos >> 2); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, + match.mask->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, + match.key->ttl); + + if (match.mask->ttl && + !MLX5_CAP_ESW_FLOWTABLE_FDB + (priv->mdev, + ft_field_support.outer_ipv4_ttl)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on TTL is not supported"); + err = -EOPNOTSUPP; + goto out; + } + } + + /* Enforce DMAC when offloading incoming tunneled flows. + * Flow counters require a match on the DMAC. + */ + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dmac_47_16), priv->netdev->dev_addr); + + /* let software handle IP fragments */ + MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0); + + return 0; + out: return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index 6f9a78c85ffd..1630f0ec3ad7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -76,8 +76,7 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct flow_cls_offload *f, - void *headers_c, - void *headers_v, u8 *match_level); + u8 *match_level); int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 2c75b2752f58..014639ea06e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -175,28 +175,20 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, struct mlx5e_tir *tir = priv->indir_tir; struct mlx5_flow_destination dest = {}; MLX5_DECLARE_FLOW_ACT(flow_act); - struct mlx5_flow_spec *spec; enum mlx5e_traffic_types tt; int err = 0; - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) { - err = -ENOMEM; - goto out; - } - dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; tt = arfs_get_tt(type); if (tt == -EINVAL) { netdev_err(priv->netdev, "%s: bad arfs_type: %d\n", __func__, type); - err = -EINVAL; - goto out; + return -EINVAL; } dest.tir_num = tir[tt].tirn; - arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, spec, + arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, NULL, &flow_act, &dest, 1); if (IS_ERR(arfs_t->default_rule)) { @@ -205,8 +197,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, netdev_err(priv->netdev, "%s: add rule failed, arfs type=%d\n", __func__, type); } -out: - kvfree(spec); + return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f9c928afec89..be20d2247594 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5467,25 +5467,27 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) goto err_destroy_netdev; } - err = register_netdev(netdev); + err = mlx5e_devlink_port_register(priv); if (err) { - mlx5_core_err(mdev, "register_netdev failed, %d\n", err); + mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err); goto err_detach; } - err = mlx5e_devlink_port_register(netdev); + err = register_netdev(netdev); if (err) { - mlx5_core_err(mdev, "mlx5e_devlink_phy_port_register failed, %d\n", err); - goto err_unregister_netdev; + mlx5_core_err(mdev, "register_netdev failed, %d\n", err); + goto err_devlink_port_unregister; } + mlx5e_devlink_port_type_eth_set(priv); + #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_init_app(priv); #endif return priv; -err_unregister_netdev: - unregister_netdev(netdev); +err_devlink_port_unregister: + mlx5e_devlink_port_unregister(priv); err_detach: mlx5e_detach(mdev, priv); err_destroy_netdev: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 86f2c0bb9507..a33d15156ed5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1607,6 +1607,7 @@ static int mlx5e_create_rep_root_ft(struct mlx5e_priv *priv) } ft_attr.max_fte = 0; /* Empty table, miss rule will always point to next table */ + ft_attr.prio = 1; ft_attr.level = 1; rpriv->root_ft = mlx5_create_flow_table(ns, &ft_attr); @@ -1952,7 +1953,7 @@ static const struct mlx5e_profile mlx5e_rep_profile = { .update_rx = mlx5e_update_rep_rx, .update_stats = mlx5e_update_ndo_stats, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, - .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, + .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq_rep, .max_tc = 1, .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), .stats_grps = mlx5e_rep_stats_grps, @@ -1972,7 +1973,7 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = { .update_stats = mlx5e_update_ndo_stats, .update_carrier = mlx5e_update_carrier, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, - .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, + .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq_rep, .max_tc = MLX5E_MAX_NUM_TC, .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), .stats_grps = mlx5e_ul_rep_stats_grps, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 3d9c72eee9fa..6a2337900420 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -55,6 +55,7 @@ struct mlx5e_neigh_update_table { unsigned long min_interval; /* jiffies */ }; +struct mlx5_tc_ct_priv; struct mlx5_rep_uplink_priv { /* Filters DB - instantiated by the uplink representor and shared by * the uplink's VFs @@ -81,6 +82,13 @@ struct mlx5_rep_uplink_priv { struct mutex unready_flows_lock; struct list_head unready_flows; struct work_struct reoffload_flows_work; + + /* maps tun_info to a unique id*/ + struct mapping_ctx *tunnel_mapping; + /* maps tun_enc_opts to a unique id*/ + struct mapping_ctx *tunnel_enc_opts_mapping; + + struct mlx5_tc_ct_priv *ct_priv; }; struct mlx5e_rep_priv { @@ -192,6 +200,8 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); +void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe); int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 065c74a2d0c5..57b24946fb74 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1195,6 +1195,7 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5e_tc_update_priv tc_priv = {}; struct mlx5_wq_cyc *wq = &rq->wqe.wq; struct mlx5e_wqe_frag_info *wi; struct sk_buff *skb; @@ -1227,13 +1228,78 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) if (rep->vlan && skb_vlan_tag_present(skb)) skb_vlan_pop(skb); + if (!mlx5e_tc_rep_update_skb(cqe, skb, &tc_priv)) + goto free_wqe; + napi_gro_receive(rq->cq.napi, skb); + mlx5_tc_rep_post_napi_receive(&tc_priv); + free_wqe: mlx5e_free_rx_wqe(rq, wi, true); wq_cyc_pop: mlx5_wq_cyc_pop(wq); } + +void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe) +{ + u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); + u16 wqe_id = be16_to_cpu(cqe->wqe_id); + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id]; + u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); + u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; + u32 head_offset = wqe_offset & (PAGE_SIZE - 1); + u32 page_idx = wqe_offset >> PAGE_SHIFT; + struct mlx5e_tc_update_priv tc_priv = {}; + struct mlx5e_rx_wqe_ll *wqe; + struct mlx5_wq_ll *wq; + struct sk_buff *skb; + u16 cqe_bcnt; + + wi->consumed_strides += cstrides; + + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + trigger_report(rq, cqe); + rq->stats->wqe_err++; + goto mpwrq_cqe_out; + } + + if (unlikely(mpwrq_is_filler_cqe(cqe))) { + struct mlx5e_rq_stats *stats = rq->stats; + + stats->mpwqe_filler_cqes++; + stats->mpwqe_filler_strides += cstrides; + goto mpwrq_cqe_out; + } + + cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); + + skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq, + mlx5e_skb_from_cqe_mpwrq_linear, + mlx5e_skb_from_cqe_mpwrq_nonlinear, + rq, wi, cqe_bcnt, head_offset, page_idx); + if (!skb) + goto mpwrq_cqe_out; + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + + if (!mlx5e_tc_rep_update_skb(cqe, skb, &tc_priv)) + goto mpwrq_cqe_out; + + napi_gro_receive(rq->cq.napi, skb); + + mlx5_tc_rep_post_napi_receive(&tc_priv); + +mpwrq_cqe_out: + if (likely(wi->consumed_strides < rq->mpwqe.num_strides)) + return; + + wq = &rq->mpwqe.wq; + wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); + mlx5e_free_rx_mpwqe(rq, wi, true); + mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); +} #endif struct sk_buff * diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index f285713def77..901f88a886c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -55,10 +55,14 @@ #include "fs_core.h" #include "en/port.h" #include "en/tc_tun.h" +#include "en/mapping.h" +#include "en/tc_ct.h" #include "lib/devcom.h" #include "lib/geneve.h" #include "diag/en_tc_tracepoint.h" +#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) + struct mlx5_nic_flow_attr { u32 action; u32 flow_tag; @@ -84,6 +88,7 @@ enum { MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4, MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5, MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6, + MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7, }; #define MLX5E_TC_MAX_SPLITS 1 @@ -134,6 +139,8 @@ struct mlx5e_tc_flow { refcount_t refcnt; struct rcu_head rcu_head; struct completion init_done; + int tunnel_id; /* the mapped tunnel id of this flow */ + union { struct mlx5_esw_flow_attr esw_attr[0]; struct mlx5_nic_flow_attr nic_attr[0]; @@ -144,15 +151,118 @@ struct mlx5e_tc_flow_parse_attr { const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; struct net_device *filter_dev; struct mlx5_flow_spec spec; - int num_mod_hdr_actions; - int max_mod_hdr_actions; - void *mod_hdr_actions; + struct mlx5e_tc_mod_hdr_acts mod_hdr_acts; int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; }; #define MLX5E_TC_TABLE_NUM_GROUPS 4 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16) +struct tunnel_match_key { + struct flow_dissector_key_control enc_control; + struct flow_dissector_key_keyid enc_key_id; + struct flow_dissector_key_ports enc_tp; + struct flow_dissector_key_ip enc_ip; + union { + struct flow_dissector_key_ipv4_addrs enc_ipv4; + struct flow_dissector_key_ipv6_addrs enc_ipv6; + }; + + int filter_ifindex; +}; + +/* Tunnel_id mapping is TUNNEL_INFO_BITS + ENC_OPTS_BITS. + * Upper TUNNEL_INFO_BITS for general tunnel info. + * Lower ENC_OPTS_BITS bits for enc_opts. + */ +#define TUNNEL_INFO_BITS 6 +#define TUNNEL_INFO_BITS_MASK GENMASK(TUNNEL_INFO_BITS - 1, 0) +#define ENC_OPTS_BITS 2 +#define ENC_OPTS_BITS_MASK GENMASK(ENC_OPTS_BITS - 1, 0) +#define TUNNEL_ID_BITS (TUNNEL_INFO_BITS + ENC_OPTS_BITS) +#define TUNNEL_ID_MASK GENMASK(TUNNEL_ID_BITS - 1, 0) + +struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { + [CHAIN_TO_REG] = { + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, + .moffset = 0, + .mlen = 2, + }, + [TUNNEL_TO_REG] = { + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1, + .moffset = 3, + .mlen = 1, + .soffset = MLX5_BYTE_OFF(fte_match_param, + misc_parameters_2.metadata_reg_c_1), + }, + [ZONE_TO_REG] = zone_to_reg_ct, + [CTSTATE_TO_REG] = ctstate_to_reg_ct, + [MARK_TO_REG] = mark_to_reg_ct, + [LABELS_TO_REG] = labels_to_reg_ct, + [FTEID_TO_REG] = fteid_to_reg_ct, + [TUPLEID_TO_REG] = tupleid_to_reg_ct, +}; + +static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow); + +void +mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, + enum mlx5e_tc_attr_to_reg type, + u32 data, + u32 mask) +{ + int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset; + int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen; + void *headers_c = spec->match_criteria; + void *headers_v = spec->match_value; + void *fmask, *fval; + + fmask = headers_c + soffset; + fval = headers_v + soffset; + + mask = cpu_to_be32(mask) >> (32 - (match_len * 8)); + data = cpu_to_be32(data) >> (32 - (match_len * 8)); + + memcpy(fmask, &mask, match_len); + memcpy(fval, &data, match_len); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; +} + +int +mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5e_tc_attr_to_reg type, + u32 data) +{ + int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; + int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield; + int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen; + char *modact; + int err; + + err = alloc_mod_hdr_actions(mdev, MLX5_FLOW_NAMESPACE_FDB, + mod_hdr_acts); + if (err) + return err; + + modact = mod_hdr_acts->actions + + (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ); + + /* Firmware has 5bit length field and 0 means 32bits */ + if (mlen == 4) + mlen = 0; + + MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, modact, field, mfield); + MLX5_SET(set_action_in, modact, offset, moffset * 8); + MLX5_SET(set_action_in, modact, length, mlen * 8); + MLX5_SET(set_action_in, modact, data, data); + mod_hdr_acts->num_actions++; + + return 0; +} + struct mlx5e_hairpin { struct mlx5_hairpin *pair; @@ -210,8 +320,6 @@ struct mlx5e_mod_hdr_entry { int compl_result; }; -#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) - static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow); @@ -361,10 +469,10 @@ static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv, struct mod_hdr_key key; u32 hash_key; - num_actions = parse_attr->num_mod_hdr_actions; + num_actions = parse_attr->mod_hdr_acts.num_actions; actions_size = MLX5_MH_ACT_SZ * num_actions; - key.actions = parse_attr->mod_hdr_actions; + key.actions = parse_attr->mod_hdr_acts.actions; key.num_actions = num_actions; hash_key = hash_mod_hdr_info(&key); @@ -954,7 +1062,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); flow_act.modify_hdr = attr->modify_hdr; - kfree(parse_attr->mod_hdr_actions); + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); if (err) return err; } @@ -1043,8 +1151,16 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, struct mlx5_esw_flow_attr *attr) { + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; struct mlx5_flow_handle *rule; + if (flow_flag_test(flow, CT)) { + mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; + + return mlx5_tc_ct_flow_offload(flow->priv, flow, spec, attr, + mod_hdr_acts); + } + rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); if (IS_ERR(rule)) return rule; @@ -1063,10 +1179,15 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, static void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow, - struct mlx5_esw_flow_attr *attr) + struct mlx5_esw_flow_attr *attr) { flow_flag_clear(flow, OFFLOADED); + if (flow_flag_test(flow, CT)) { + mlx5_tc_ct_delete_flow(flow->priv, flow, attr); + return; + } + if (attr->split_count) mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); @@ -1224,7 +1345,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); - kfree(parse_attr->mod_hdr_actions); + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); if (err) return err; } @@ -1274,6 +1395,8 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr *attr = flow->esw_attr; int out_index; + mlx5e_put_flow_tunnel_id(flow); + if (flow_flag_test(flow, NOT_READY)) { remove_unready_flow(flow); kvfree(attr->parse_attr); @@ -1662,150 +1785,272 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, } } +static int flow_has_tc_fwd_action(struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_action *flow_action = &rule->action; + const struct flow_action_entry *act; + int i; -static int parse_tunnel_attr(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct flow_cls_offload *f, - struct net_device *filter_dev, u8 *match_level) + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_GOTO: + return true; + default: + continue; + } + } + + return false; +} + +static int +enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv, + struct flow_dissector_key_enc_opts *opts, + struct netlink_ext_ack *extack, + bool *dont_care) +{ + struct geneve_opt *opt; + int off = 0; + + *dont_care = true; + + while (opts->len > off) { + opt = (struct geneve_opt *)&opts->data[off]; + + if (!(*dont_care) || opt->opt_class || opt->type || + memchr_inv(opt->opt_data, 0, opt->length * 4)) { + *dont_care = false; + + if (opt->opt_class != U16_MAX || + opt->type != U8_MAX || + memchr_inv(opt->opt_data, 0xFF, + opt->length * 4)) { + NL_SET_ERR_MSG(extack, + "Partial match of tunnel options in chain > 0 isn't supported"); + netdev_warn(priv->netdev, + "Partial match of tunnel options in chain > 0 isn't supported"); + return -EOPNOTSUPP; + } + } + + off += sizeof(struct geneve_opt) + opt->length * 4; + } + + return 0; +} + +#define COPY_DISSECTOR(rule, diss_key, dst)\ +({ \ + struct flow_rule *__rule = (rule);\ + typeof(dst) __dst = dst;\ +\ + memcpy(__dst,\ + skb_flow_dissector_target(__rule->match.dissector,\ + diss_key,\ + __rule->match.key),\ + sizeof(*__dst));\ +}) + +static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct flow_cls_offload *f, + struct net_device *filter_dev) { - struct netlink_ext_ack *extack = f->common.extack; - void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - outer_headers); - void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, - outer_headers); struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; + struct flow_match_enc_opts enc_opts_match; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct tunnel_match_key tunnel_key; + bool enc_opts_is_dont_care = true; + u32 tun_id, enc_opts_id = 0; + struct mlx5_eswitch *esw; + u32 value, mask; int err; - err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, - headers_c, headers_v, match_level); - if (err) { - NL_SET_ERR_MSG_MOD(extack, - "failed to parse tunnel attributes"); + esw = priv->mdev->priv.eswitch; + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + memset(&tunnel_key, 0, sizeof(tunnel_key)); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL, + &tunnel_key.enc_control); + if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + &tunnel_key.enc_ipv4); + else + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, + &tunnel_key.enc_ipv6); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS, + &tunnel_key.enc_tp); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID, + &tunnel_key.enc_key_id); + tunnel_key.filter_ifindex = filter_dev->ifindex; + + err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id); + if (err) return err; - } - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { - struct flow_match_control match; - u16 addr_type; + flow_rule_match_enc_opts(rule, &enc_opts_match); + err = enc_opts_is_dont_care_or_full_match(priv, + enc_opts_match.mask, + extack, + &enc_opts_is_dont_care); + if (err) + goto err_enc_opts; - flow_rule_match_enc_control(rule, &match); - addr_type = match.key->addr_type; + if (!enc_opts_is_dont_care) { + err = mapping_add(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_match.key, &enc_opts_id); + if (err) + goto err_enc_opts; + } - /* For tunnel addr_type used same key id`s as for non-tunnel */ - if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { - struct flow_match_ipv4_addrs match; + value = tun_id << ENC_OPTS_BITS | enc_opts_id; + mask = enc_opts_id ? TUNNEL_ID_MASK : + (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK); - flow_rule_match_enc_ipv4_addrs(rule, &match); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - src_ipv4_src_ipv6.ipv4_layout.ipv4, - ntohl(match.mask->src)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - src_ipv4_src_ipv6.ipv4_layout.ipv4, - ntohl(match.key->src)); + if (attr->chain) { + mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec, + TUNNEL_TO_REG, value, mask); + } else { + mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; + err = mlx5e_tc_match_to_reg_set(priv->mdev, + mod_hdr_acts, + TUNNEL_TO_REG, value); + if (err) + goto err_set; - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4, - ntohl(match.mask->dst)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4, - ntohl(match.key->dst)); - - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, - ethertype); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, - ETH_P_IP); - } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { - struct flow_match_ipv6_addrs match; - - flow_rule_match_enc_ipv6_addrs(rule, &match); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - src_ipv4_src_ipv6.ipv6_layout.ipv6), - &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - src_ipv4_src_ipv6.ipv6_layout.ipv6), - &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6)); - - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6)); - - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, - ethertype); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, - ETH_P_IPV6); - } + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; } - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { - struct flow_match_ip match; + flow->tunnel_id = value; + return 0; - flow_rule_match_enc_ip(rule, &match); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, - match.mask->tos & 0x3); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, - match.key->tos & 0x3); +err_set: + if (enc_opts_id) + mapping_remove(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_id); +err_enc_opts: + mapping_remove(uplink_priv->tunnel_mapping, tun_id); + return err; +} - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, - match.mask->tos >> 2); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, - match.key->tos >> 2); +static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow) +{ + u32 enc_opts_id = flow->tunnel_id & ENC_OPTS_BITS_MASK; + u32 tun_id = flow->tunnel_id >> ENC_OPTS_BITS; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5_eswitch *esw; - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, - match.mask->ttl); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, - match.key->ttl); + esw = flow->priv->mdev->priv.eswitch; + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + if (tun_id) + mapping_remove(uplink_priv->tunnel_mapping, tun_id); + if (enc_opts_id) + mapping_remove(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_id); +} - if (match.mask->ttl && - !MLX5_CAP_ESW_FLOWTABLE_FDB - (priv->mdev, - ft_field_support.outer_ipv4_ttl)) { +u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow) +{ + return flow->tunnel_id; +} + +static int parse_tunnel_attr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct net_device *filter_dev, + u8 *match_level, + bool *match_inner) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct netlink_ext_ack *extack = f->common.extack; + bool needs_mapping, sets_mapping; + int err; + + if (!mlx5e_is_eswitch_flow(flow)) + return -EOPNOTSUPP; + + needs_mapping = !!flow->esw_attr->chain; + sets_mapping = !flow->esw_attr->chain && flow_has_tc_fwd_action(f); + *match_inner = !needs_mapping; + + if ((needs_mapping || sets_mapping) && + !mlx5_eswitch_reg_c1_loopback_enabled(esw)) { + NL_SET_ERR_MSG(extack, + "Chains on tunnel devices isn't supported without register loopback support"); + netdev_warn(priv->netdev, + "Chains on tunnel devices isn't supported without register loopback support"); + return -EOPNOTSUPP; + } + + if (!flow->esw_attr->chain) { + err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, + match_level); + if (err) { NL_SET_ERR_MSG_MOD(extack, - "Matching on TTL is not supported"); - return -EOPNOTSUPP; + "Failed to parse tunnel attributes"); + netdev_warn(priv->netdev, + "Failed to parse tunnel attributes"); + return err; } + flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; } - /* Enforce DMAC when offloading incoming tunneled flows. - * Flow counters require a match on the DMAC. - */ - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16); - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dmac_47_16), priv->netdev->dev_addr); + if (!needs_mapping && !sets_mapping) + return 0; - /* let software handle IP fragments */ - MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0); + return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev); +} - return 0; +static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec) +{ + return MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + inner_headers); } -static void *get_match_headers_criteria(u32 flags, - struct mlx5_flow_spec *spec) +static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec) { - return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? - MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - inner_headers) : - MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - outer_headers); + return MLX5_ADDR_OF(fte_match_param, spec->match_value, + inner_headers); +} + +static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec) +{ + return MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); +} + +static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec) +{ + return MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); } static void *get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec) { return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? - MLX5_ADDR_OF(fte_match_param, spec->match_value, - inner_headers) : - MLX5_ADDR_OF(fte_match_param, spec->match_value, - outer_headers); + get_match_inner_headers_value(spec) : + get_match_outer_headers_value(spec); +} + +static void *get_match_headers_criteria(u32 flags, + struct mlx5_flow_spec *spec) +{ + return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? + get_match_inner_headers_criteria(spec) : + get_match_outer_headers_criteria(spec); } static int mlx5e_flower_parse_meta(struct net_device *filter_dev, @@ -1843,6 +2088,7 @@ static int mlx5e_flower_parse_meta(struct net_device *filter_dev, } static int __parse_cls_flower(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, struct flow_cls_offload *f, struct net_device *filter_dev, @@ -1883,6 +2129,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | BIT(FLOW_DISSECTOR_KEY_TCP) | BIT(FLOW_DISSECTOR_KEY_IP) | + BIT(FLOW_DISSECTOR_KEY_CT) | BIT(FLOW_DISSECTOR_KEY_ENC_IP) | BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) { NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); @@ -1892,18 +2139,22 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, } if (mlx5e_get_tc_tun(filter_dev)) { - if (parse_tunnel_attr(priv, spec, f, filter_dev, - outer_match_level)) - return -EOPNOTSUPP; + bool match_inner = false; - /* At this point, header pointers should point to the inner - * headers, outer header were already set by parse_tunnel_attr - */ - match_level = inner_match_level; - headers_c = get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP, - spec); - headers_v = get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP, - spec); + err = parse_tunnel_attr(priv, flow, spec, f, filter_dev, + outer_match_level, &match_inner); + if (err) + return err; + + if (match_inner) { + /* header pointers should point to the inner headers + * if the packet was decapsulated already. + * outer headers are set by parse_tunnel_attr. + */ + match_level = inner_match_level; + headers_c = get_match_inner_headers_criteria(spec); + headers_v = get_match_inner_headers_value(spec); + } } err = mlx5e_flower_parse_meta(filter_dev, f); @@ -2220,8 +2471,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv, inner_match_level = MLX5_MATCH_NONE; outer_match_level = MLX5_MATCH_NONE; - err = __parse_cls_flower(priv, spec, f, filter_dev, &inner_match_level, - &outer_match_level); + err = __parse_cls_flower(priv, flow, spec, f, filter_dev, + &inner_match_level, &outer_match_level); non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ? outer_match_level : inner_match_level; @@ -2381,25 +2632,26 @@ static struct mlx5_fields fields[] = { OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport), }; -/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at - * max from the SW pedit action. On success, attr->num_mod_hdr_actions - * says how many HW actions were actually parsed. - */ -static int offload_pedit_fields(struct pedit_headers_action *hdrs, +static int offload_pedit_fields(struct mlx5e_priv *priv, + int namespace, + struct pedit_headers_action *hdrs, struct mlx5e_tc_flow_parse_attr *parse_attr, u32 *action_flags, struct netlink_ext_ack *extack) { struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; - int i, action_size, nactions, max_actions, first, last, next_z; + int i, action_size, first, last, next_z; void *headers_c, *headers_v, *action, *vals_p; u32 *s_masks_p, *a_masks_p, s_mask, a_mask; + struct mlx5e_tc_mod_hdr_acts *mod_acts; struct mlx5_fields *f; unsigned long mask; __be32 mask_be32; __be16 mask_be16; + int err; u8 cmd; + mod_acts = &parse_attr->mod_hdr_acts; headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec); headers_v = get_match_headers_value(*action_flags, &parse_attr->spec); @@ -2409,11 +2661,6 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, add_vals = &hdrs[1].vals; action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); - action = parse_attr->mod_hdr_actions + - parse_attr->num_mod_hdr_actions * action_size; - - max_actions = parse_attr->max_mod_hdr_actions; - nactions = parse_attr->num_mod_hdr_actions; for (i = 0; i < ARRAY_SIZE(fields); i++) { bool skip; @@ -2439,13 +2686,6 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, return -EOPNOTSUPP; } - if (nactions == max_actions) { - NL_SET_ERR_MSG_MOD(extack, - "too many pedit actions, can't offload"); - printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions); - return -EOPNOTSUPP; - } - skip = false; if (s_mask) { void *match_mask = headers_c + f->match_offset; @@ -2492,6 +2732,18 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, return -EOPNOTSUPP; } + err = alloc_mod_hdr_actions(priv->mdev, namespace, mod_acts); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "too many pedit actions, can't offload"); + mlx5_core_warn(priv->mdev, + "mlx5: parsed %d pedit actions, can't do more\n", + mod_acts->num_actions); + return err; + } + + action = mod_acts->actions + + (mod_acts->num_actions * action_size); MLX5_SET(set_action_in, action, action_type, cmd); MLX5_SET(set_action_in, action, field, f->field); @@ -2514,11 +2766,9 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, else if (f->field_bsize == 8) MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first); - action += action_size; - nactions++; + ++mod_acts->num_actions; } - parse_attr->num_mod_hdr_actions = nactions; return 0; } @@ -2531,29 +2781,48 @@ static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev, return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions); } -static int alloc_mod_hdr_actions(struct mlx5e_priv *priv, - struct pedit_headers_action *hdrs, - int namespace, - struct mlx5e_tc_flow_parse_attr *parse_attr) +int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev, + int namespace, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) { - int nkeys, action_size, max_actions; + int action_size, new_num_actions, max_hw_actions; + size_t new_sz, old_sz; + void *ret; - nkeys = hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits + - hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits; - action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); + if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions) + return 0; - max_actions = mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace); - /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */ - max_actions = min(max_actions, nkeys * 16); + action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); - parse_attr->mod_hdr_actions = kcalloc(max_actions, action_size, GFP_KERNEL); - if (!parse_attr->mod_hdr_actions) + max_hw_actions = mlx5e_flow_namespace_max_modify_action(mdev, + namespace); + new_num_actions = min(max_hw_actions, + mod_hdr_acts->actions ? + mod_hdr_acts->max_actions * 2 : 1); + if (mod_hdr_acts->max_actions == new_num_actions) + return -ENOSPC; + + new_sz = action_size * new_num_actions; + old_sz = mod_hdr_acts->max_actions * action_size; + ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL); + if (!ret) return -ENOMEM; - parse_attr->max_mod_hdr_actions = max_actions; + memset(ret + old_sz, 0, new_sz - old_sz); + mod_hdr_acts->actions = ret; + mod_hdr_acts->max_actions = new_num_actions; + return 0; } +void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +{ + kfree(mod_hdr_acts->actions); + mod_hdr_acts->actions = NULL; + mod_hdr_acts->num_actions = 0; + mod_hdr_acts->max_actions = 0; +} + static const struct pedit_headers zero_masks = {}; static int parse_tc_pedit_action(struct mlx5e_priv *priv, @@ -2605,13 +2874,8 @@ static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, int err; u8 cmd; - if (!parse_attr->mod_hdr_actions) { - err = alloc_mod_hdr_actions(priv, hdrs, namespace, parse_attr); - if (err) - goto out_err; - } - - err = offload_pedit_fields(hdrs, parse_attr, action_flags, extack); + err = offload_pedit_fields(priv, namespace, hdrs, parse_attr, + action_flags, extack); if (err < 0) goto out_dealloc_parsed_actions; @@ -2631,8 +2895,7 @@ static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, return 0; out_dealloc_parsed_actions: - kfree(parse_attr->mod_hdr_actions); -out_err: + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); return err; } @@ -2677,7 +2940,9 @@ struct ipv6_hoplimit_word { __u8 hop_limit; }; -static bool is_action_keys_supported(const struct flow_action_entry *act) +static int is_action_keys_supported(const struct flow_action_entry *act, + bool ct_flow, bool *modify_ip_header, + struct netlink_ext_ack *extack) { u32 mask, offset; u8 htype; @@ -2696,7 +2961,13 @@ static bool is_action_keys_supported(const struct flow_action_entry *act) if (offset != offsetof(struct iphdr, ttl) || ttl_word->protocol || ttl_word->check) { - return true; + *modify_ip_header = true; + } + + if (ct_flow && offset >= offsetof(struct iphdr, saddr)) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload re-write of ipv4 address with action ct"); + return -EOPNOTSUPP; } } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) { struct ipv6_hoplimit_word *hoplimit_word = @@ -2705,15 +2976,27 @@ static bool is_action_keys_supported(const struct flow_action_entry *act) if (offset != offsetof(struct ipv6hdr, payload_len) || hoplimit_word->payload_len || hoplimit_word->nexthdr) { - return true; + *modify_ip_header = true; } + + if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr)) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload re-write of ipv6 address with action ct"); + return -EOPNOTSUPP; + } + } else if (ct_flow && (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP || + htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP)) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload re-write of transport header ports with action ct"); + return -EOPNOTSUPP; } - return false; + + return 0; } static bool modify_header_match_supported(struct mlx5_flow_spec *spec, struct flow_action *flow_action, - u32 actions, + u32 actions, bool ct_flow, struct netlink_ext_ack *extack) { const struct flow_action_entry *act; @@ -2721,7 +3004,7 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, void *headers_v; u16 ethertype; u8 ip_proto; - int i; + int i, err; headers_v = get_match_headers_value(actions, spec); ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); @@ -2736,10 +3019,10 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, act->id != FLOW_ACTION_ADD) continue; - if (is_action_keys_supported(act)) { - modify_ip_header = true; - break; - } + err = is_action_keys_supported(act, ct_flow, + &modify_ip_header, extack); + if (err) + return err; } ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol); @@ -2761,23 +3044,29 @@ static bool actions_match_supported(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + bool ct_flow; u32 actions; - if (mlx5e_is_eswitch_flow(flow)) + ct_flow = flow_flag_test(flow, CT); + if (mlx5e_is_eswitch_flow(flow)) { actions = flow->esw_attr->action; - else - actions = flow->nic_attr->action; - if (flow_flag_test(flow, EGRESS) && - !((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) || - (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || - (actions & MLX5_FLOW_CONTEXT_ACTION_DROP))) - return false; + if (flow->esw_attr->split_count && ct_flow) { + /* All registers used by ct are cleared when using + * split rules. + */ + NL_SET_ERR_MSG_MOD(extack, + "Can't offload mirroring with action ct"); + return -EOPNOTSUPP; + } + } else { + actions = flow->nic_attr->action; + } if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) return modify_header_match_supported(&parse_attr->spec, flow_action, actions, - extack); + ct_flow, extack); return true; } @@ -2878,8 +3167,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, if (!flow_action_has_entries(flow_action)) return -EINVAL; - if (!flow_action_hw_stats_types_check(flow_action, extack, - FLOW_ACTION_HW_STATS_TYPE_DELAYED_BIT)) + if (!flow_action_hw_stats_check(flow_action, extack, + FLOW_ACTION_HW_STATS_DELAYED_BIT)) return -EOPNOTSUPP; attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; @@ -2968,9 +3257,9 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, /* in case all pedit actions are skipped, remove the MOD_HDR * flag. */ - if (parse_attr->num_mod_hdr_actions == 0) { + if (parse_attr->mod_hdr_acts.num_actions == 0) { action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - kfree(parse_attr->mod_hdr_actions); + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); } } @@ -3352,6 +3641,46 @@ static int mlx5_validate_goto_chain(struct mlx5_eswitch *esw, return 0; } +static int verify_uplink_forwarding(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct net_device *out_dev, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5e_rep_priv *rep_priv; + + /* Forwarding non encapsulated traffic between + * uplink ports is allowed only if + * termination_table_raw_traffic cap is set. + * + * Input vport was stored esw_attr->in_rep. + * In LAG case, *priv* is the private data of + * uplink which may be not the input vport. + */ + rep_priv = mlx5e_rep_to_rep_priv(attr->in_rep); + + if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) && + mlx5e_eswitch_uplink_rep(out_dev))) + return 0; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, + termination_table_raw_traffic)) { + NL_SET_ERR_MSG_MOD(extack, + "devices are both uplink, can't offload forwarding"); + pr_err("devices %s %s are both uplink, can't offload forwarding\n", + priv->netdev->name, out_dev->name); + return -EOPNOTSUPP; + } else if (out_dev != rep_priv->netdev) { + NL_SET_ERR_MSG_MOD(extack, + "devices are not the same uplink, can't offload forwarding"); + pr_err("devices %s %s are both uplink but not the same, can't offload forwarding\n", + priv->netdev->name, out_dev->name); + return -EOPNOTSUPP; + } + return 0; +} + static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, struct mlx5e_tc_flow *flow, @@ -3366,15 +3695,15 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS]; bool ft_flow = mlx5e_is_ft_flow(flow); const struct flow_action_entry *act; + bool encap = false, decap = false; + u32 action = attr->action; int err, i, if_count = 0; - bool encap = false; - u32 action = 0; if (!flow_action_has_entries(flow_action)) return -EINVAL; - if (!flow_action_hw_stats_types_check(flow_action, extack, - FLOW_ACTION_HW_STATS_TYPE_DELAYED_BIT)) + if (!flow_action_hw_stats_check(flow_action, extack, + FLOW_ACTION_HW_STATS_DELAYED_BIT)) return -EOPNOTSUPP; flow_action_for_each(i, act, flow_action) { @@ -3449,7 +3778,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); struct net_device *uplink_upper; - struct mlx5e_rep_priv *rep_priv; if (is_duplicated_output_device(priv->netdev, out_dev, @@ -3485,21 +3813,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return err; } - /* Don't allow forwarding between uplink. - * - * Input vport was stored esw_attr->in_rep. - * In LAG case, *priv* is the private data of - * uplink which may be not the input vport. - */ - rep_priv = mlx5e_rep_to_rep_priv(attr->in_rep); - if (mlx5e_eswitch_uplink_rep(rep_priv->netdev) && - mlx5e_eswitch_uplink_rep(out_dev)) { - NL_SET_ERR_MSG_MOD(extack, - "devices are both uplink, can't offload forwarding"); - pr_err("devices %s %s are both uplink, can't offload forwarding\n", - priv->netdev->name, out_dev->name); - return -EOPNOTSUPP; - } + err = verify_uplink_forwarding(priv, flow, out_dev, extack); + if (err) + return err; if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) { NL_SET_ERR_MSG_MOD(extack, @@ -3571,7 +3887,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, attr->split_count = attr->out_count; break; case FLOW_ACTION_TUNNEL_DECAP: - action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; + decap = true; break; case FLOW_ACTION_GOTO: err = mlx5_validate_goto_chain(esw, flow, act, action, @@ -3582,6 +3898,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; attr->dest_chain = act->chain_index; break; + case FLOW_ACTION_CT: + err = mlx5_tc_ct_parse_action(priv, attr, act, extack); + if (err) + return err; + + flow_flag_set(flow, CT); + break; default: NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported"); return -EOPNOTSUPP; @@ -3610,9 +3933,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, * flag. we might have set split_count either by pedit or * pop/push. if there is no pop/push either, reset it too. */ - if (parse_attr->num_mod_hdr_actions == 0) { + if (parse_attr->mod_hdr_acts.num_actions == 0) { action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - kfree(parse_attr->mod_hdr_actions); + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))) attr->split_count = 0; @@ -3624,6 +3947,22 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; if (attr->dest_chain) { + if (decap) { + /* It can be supported if we'll create a mapping for + * the tunnel device only (without tunnel), and set + * this tunnel id with this decap flow. + * + * On restore (miss), we'll just set this saved tunnel + * device. + */ + + NL_SET_ERR_MSG(extack, + "Decap with goto isn't supported"); + netdev_warn(priv->netdev, + "Decap with goto isn't supported"); + return -EOPNOTSUPP; + } + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { NL_SET_ERR_MSG_MOD(extack, "Mirroring goto chain rules isn't supported"); @@ -3806,6 +4145,10 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto err_free; + err = mlx5_tc_ct_parse_match(priv, &parse_attr->spec, f, extack); + if (err) + goto err_free; + err = mlx5e_tc_add_fdb_flow(priv, flow, extack); complete_all(&flow->init_done); if (err) { @@ -4090,7 +4433,7 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, goto errout; } - if (mlx5e_is_offloaded_flow(flow)) { + if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) { counter = mlx5e_tc_get_counter(flow); if (!counter) goto errout; @@ -4181,7 +4524,7 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - if (!flow_action_basic_hw_stats_types_check(flow_action, extack)) + if (!flow_action_basic_hw_stats_check(flow_action, extack)) return -EOPNOTSUPP; flow_action_for_each(i, act, flow_action) { @@ -4205,8 +4548,14 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, struct tc_cls_matchall_offload *ma) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct netlink_ext_ack *extack = ma->common.extack; + if (!mlx5_esw_qos_enabled(esw)) { + NL_SET_ERR_MSG_MOD(extack, "QoS is not supported on this device"); + return -EOPNOTSUPP; + } + if (ma->common.prio != 1) { NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported"); return -EINVAL; @@ -4353,12 +4702,63 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) int mlx5e_tc_esw_init(struct rhashtable *tc_ht) { - return rhashtable_init(tc_ht, &tc_ht_params); + const size_t sz_enc_opts = sizeof(struct flow_dissector_key_enc_opts); + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *priv; + struct mapping_ctx *mapping; + int err; + + uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht); + priv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); + + err = mlx5_tc_ct_init(uplink_priv); + if (err) + goto err_ct; + + mapping = mapping_create(sizeof(struct tunnel_match_key), + TUNNEL_INFO_BITS_MASK, true); + if (IS_ERR(mapping)) { + err = PTR_ERR(mapping); + goto err_tun_mapping; + } + uplink_priv->tunnel_mapping = mapping; + + mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK, true); + if (IS_ERR(mapping)) { + err = PTR_ERR(mapping); + goto err_enc_opts_mapping; + } + uplink_priv->tunnel_enc_opts_mapping = mapping; + + err = rhashtable_init(tc_ht, &tc_ht_params); + if (err) + goto err_ht_init; + + return err; + +err_ht_init: + mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); +err_enc_opts_mapping: + mapping_destroy(uplink_priv->tunnel_mapping); +err_tun_mapping: + mlx5_tc_ct_clean(uplink_priv); +err_ct: + netdev_warn(priv->netdev, + "Failed to initialize tc (eswitch), err: %d", err); + return err; } void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) { + struct mlx5_rep_uplink_priv *uplink_priv; + rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); + + uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht); + mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); + mapping_destroy(uplink_priv->tunnel_mapping); + + mlx5_tc_ct_clean(uplink_priv); } int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags) @@ -4390,3 +4790,147 @@ void mlx5e_tc_reoffload_flows_work(struct work_struct *work) } mutex_unlock(&rpriv->unready_flows_lock); } + +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) +static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5e_tc_update_priv *tc_priv, + u32 tunnel_id) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct flow_dissector_key_enc_opts enc_opts = {}; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct metadata_dst *tun_dst; + struct tunnel_match_key key; + u32 tun_id, enc_opts_id; + struct net_device *dev; + int err; + + enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK; + tun_id = tunnel_id >> ENC_OPTS_BITS; + + if (!tun_id) + return true; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key); + if (err) { + WARN_ON_ONCE(true); + netdev_dbg(priv->netdev, + "Couldn't find tunnel for tun_id: %d, err: %d\n", + tun_id, err); + return false; + } + + if (enc_opts_id) { + err = mapping_find(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_id, &enc_opts); + if (err) { + netdev_dbg(priv->netdev, + "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n", + enc_opts_id, err); + return false; + } + } + + tun_dst = tun_rx_dst(enc_opts.len); + if (!tun_dst) { + WARN_ON_ONCE(true); + return false; + } + + ip_tunnel_key_init(&tun_dst->u.tun_info.key, + key.enc_ipv4.src, key.enc_ipv4.dst, + key.enc_ip.tos, key.enc_ip.ttl, + 0, /* label */ + key.enc_tp.src, key.enc_tp.dst, + key32_to_tunnel_id(key.enc_key_id.keyid), + TUNNEL_KEY); + + if (enc_opts.len) + ip_tunnel_info_opts_set(&tun_dst->u.tun_info, enc_opts.data, + enc_opts.len, enc_opts.dst_opt_type); + + skb_dst_set(skb, (struct dst_entry *)tun_dst); + dev = dev_get_by_index(&init_net, key.filter_ifindex); + if (!dev) { + netdev_dbg(priv->netdev, + "Couldn't find tunnel device with ifindex: %d\n", + key.filter_ifindex); + return false; + } + + /* Set tun_dev so we do dev_put() after datapath */ + tc_priv->tun_dev = dev; + + skb->dev = dev; + + return true; +} +#endif /* CONFIG_NET_TC_SKB_EXT */ + +bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe, + struct sk_buff *skb, + struct mlx5e_tc_update_priv *tc_priv) +{ +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + u32 chain = 0, reg_c0, reg_c1, tunnel_id, tuple_id; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct tc_skb_ext *tc_skb_ext; + struct mlx5_eswitch *esw; + struct mlx5e_priv *priv; + int tunnel_moffset; + int err; + + reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK); + if (reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG) + reg_c0 = 0; + reg_c1 = be32_to_cpu(cqe->imm_inval_pkey); + + if (!reg_c0) + return true; + + priv = netdev_priv(skb->dev); + esw = priv->mdev->priv.eswitch; + + err = mlx5_eswitch_get_chain_for_tag(esw, reg_c0, &chain); + if (err) { + netdev_dbg(priv->netdev, + "Couldn't find chain for chain tag: %d, err: %d\n", + reg_c0, err); + return false; + } + + if (chain) { + tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT); + if (!tc_skb_ext) { + WARN_ON(1); + return false; + } + + tc_skb_ext->chain = chain; + + tuple_id = reg_c1 & TUPLE_ID_MAX; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + if (!mlx5e_tc_ct_restore_flow(uplink_priv, skb, tuple_id)) + return false; + } + + tunnel_moffset = mlx5e_tc_attr_to_reg_mappings[TUNNEL_TO_REG].moffset; + tunnel_id = reg_c1 >> (8 * tunnel_moffset); + return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id); +#endif /* CONFIG_NET_TC_SKB_EXT */ + + return true; +} + +void mlx5_tc_rep_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv) +{ + if (tc_priv->tun_dev) + dev_put(tc_priv->tun_dev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 262cdb7b69b1..abdcfa4c4e0e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -91,9 +91,63 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags); void mlx5e_tc_reoffload_flows_work(struct work_struct *work); +enum mlx5e_tc_attr_to_reg { + CHAIN_TO_REG, + TUNNEL_TO_REG, + CTSTATE_TO_REG, + ZONE_TO_REG, + MARK_TO_REG, + LABELS_TO_REG, + FTEID_TO_REG, + TUPLEID_TO_REG, +}; + +struct mlx5e_tc_attr_to_reg_mapping { + int mfield; /* rewrite field */ + int moffset; /* offset of mfield */ + int mlen; /* bytes to rewrite/match */ + + int soffset; /* offset of spec for match */ +}; + +extern struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[]; + bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, struct net_device *out_dev); +struct mlx5e_tc_update_priv { + struct net_device *tun_dev; +}; + +bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb, + struct mlx5e_tc_update_priv *tc_priv); + +void mlx5_tc_rep_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv); + +struct mlx5e_tc_mod_hdr_acts { + int num_actions; + int max_actions; + void *actions; +}; + +int mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5e_tc_attr_to_reg type, + u32 data); + +void mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, + enum mlx5e_tc_attr_to_reg type, + u32 data, + u32 mask); + +int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev, + int namespace, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); +void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); + +struct mlx5e_tc_flow; +u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow); + #else /* CONFIG_MLX5_ESWITCH */ static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 25640864c375..8fc351240f4c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1334,7 +1334,6 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, goto out; } - memset(spec, 0, sizeof(*spec)); flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; /* Attach drop flow counter */ @@ -1346,7 +1345,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, dest_num++; } vport->ingress.legacy.drop_rule = - mlx5_add_flow_rules(vport->ingress.acl, spec, + mlx5_add_flow_rules(vport->ingress.acl, NULL, &flow_act, dst, dest_num); if (IS_ERR(vport->ingress.legacy.drop_rule)) { err = PTR_ERR(vport->ingress.legacy.drop_rule); @@ -1409,7 +1408,6 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, struct mlx5_flow_destination drop_ctr_dst = {0}; struct mlx5_flow_destination *dst = NULL; struct mlx5_flow_act flow_act = {0}; - struct mlx5_flow_spec *spec; int dest_num = 0; int err = 0; @@ -1438,11 +1436,6 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, if (err) return err; - /* Drop others rule (star rule) */ - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) - goto out; - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; /* Attach egress drop flow counter */ @@ -1454,7 +1447,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, dest_num++; } vport->egress.legacy.drop_rule = - mlx5_add_flow_rules(vport->egress.acl, spec, + mlx5_add_flow_rules(vport->egress.acl, NULL, &flow_act, dst, dest_num); if (IS_ERR(vport->egress.legacy.drop_rule)) { err = PTR_ERR(vport->egress.legacy.drop_rule); @@ -1463,8 +1456,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, vport->vport, err); vport->egress.legacy.drop_rule = NULL; } -out: - kvfree(spec); + return err; } @@ -1670,34 +1662,6 @@ static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) ((u8 *)node_guid)[0] = mac[5]; } -static void esw_apply_vport_conf(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) -{ - u16 vport_num = vport->vport; - int flags; - - if (mlx5_esw_is_manager_vport(esw, vport_num)) - return; - - mlx5_modify_vport_admin_state(esw->dev, - MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - vport_num, 1, - vport->info.link_state); - - /* Host PF has its own mac/guid. */ - if (vport_num) { - mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, - vport->info.mac); - mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, - vport->info.node_guid); - } - - flags = (vport->info.vlan || vport->info.qos) ? - SET_VLAN_STRIP | SET_VLAN_INSERT : 0; - modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos, - flags); -} - static int esw_vport_create_legacy_acl_tables(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { @@ -1707,8 +1671,7 @@ static int esw_vport_create_legacy_acl_tables(struct mlx5_eswitch *esw, if (mlx5_esw_is_manager_vport(esw, vport->vport)) return 0; - if (!mlx5_esw_is_manager_vport(esw, vport->vport) && - MLX5_CAP_ESW_INGRESS_ACL(esw->dev, flow_counter)) { + if (MLX5_CAP_ESW_INGRESS_ACL(esw->dev, flow_counter)) { vport->ingress.legacy.drop_counter = mlx5_fc_create(esw->dev, false); if (IS_ERR(vport->ingress.legacy.drop_counter)) { esw_warn(esw->dev, @@ -1722,8 +1685,7 @@ static int esw_vport_create_legacy_acl_tables(struct mlx5_eswitch *esw, if (ret) goto ingress_err; - if (!mlx5_esw_is_manager_vport(esw, vport->vport) && - MLX5_CAP_ESW_EGRESS_ACL(esw->dev, flow_counter)) { + if (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, flow_counter)) { vport->egress.legacy.drop_counter = mlx5_fc_create(esw->dev, false); if (IS_ERR(vport->egress.legacy.drop_counter)) { esw_warn(esw->dev, @@ -1784,29 +1746,75 @@ static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw, esw_vport_destroy_offloads_acl_tables(esw, vport); } -static int esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, - enum mlx5_eswitch_vport_event enabled_events) +static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + u16 vport_num = vport->vport; + int flags; + int err; + + err = esw_vport_setup_acl(esw, vport); + if (err) + return err; + + /* Attach vport to the eswitch rate limiter */ + esw_vport_enable_qos(esw, vport, vport->info.max_rate, vport->qos.bw_share); + + if (mlx5_esw_is_manager_vport(esw, vport_num)) + return 0; + + mlx5_modify_vport_admin_state(esw->dev, + MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, + vport_num, 1, + vport->info.link_state); + + /* Host PF has its own mac/guid. */ + if (vport_num) { + mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, + vport->info.mac); + mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, + vport->info.node_guid); + } + + flags = (vport->info.vlan || vport->info.qos) ? + SET_VLAN_STRIP | SET_VLAN_INSERT : 0; + modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, + vport->info.qos, flags); + + return 0; +} + +/* Don't cleanup vport->info, it's needed to restore vport configuration */ +static void esw_vport_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { u16 vport_num = vport->vport; + + if (!mlx5_esw_is_manager_vport(esw, vport_num)) + mlx5_modify_vport_admin_state(esw->dev, + MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, + vport_num, 1, + MLX5_VPORT_ADMIN_STATE_DOWN); + + esw_vport_disable_qos(esw, vport); + esw_vport_cleanup_acl(esw, vport); +} + +static int esw_enable_vport(struct mlx5_eswitch *esw, u16 vport_num, + enum mlx5_eswitch_vport_event enabled_events) +{ + struct mlx5_vport *vport; int ret; + vport = mlx5_eswitch_get_vport(esw, vport_num); + mutex_lock(&esw->state_lock); WARN_ON(vport->enabled); esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); - /* Restore old vport configuration */ - esw_apply_vport_conf(esw, vport); - - ret = esw_vport_setup_acl(esw, vport); + ret = esw_vport_setup(esw, vport); if (ret) goto done; - /* Attach vport to the eswitch rate limiter */ - if (esw_vport_enable_qos(esw, vport, vport->info.max_rate, - vport->qos.bw_share)) - esw_warn(esw->dev, "Failed to attach vport %d to eswitch rate limiter", vport_num); - /* Sync with current vport context */ vport->enabled_events = enabled_events; vport->enabled = true; @@ -1827,10 +1835,11 @@ done: return ret; } -static void esw_disable_vport(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +static void esw_disable_vport(struct mlx5_eswitch *esw, u16 vport_num) { - u16 vport_num = vport->vport; + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vport_num); mutex_lock(&esw->state_lock); if (!vport->enabled) @@ -1848,16 +1857,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, */ esw_vport_change_handle_locked(vport); vport->enabled_events = 0; - esw_vport_disable_qos(esw, vport); - - if (!mlx5_esw_is_manager_vport(esw, vport->vport) && - esw->mode == MLX5_ESWITCH_LEGACY) - mlx5_modify_vport_admin_state(esw->dev, - MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - vport_num, 1, - MLX5_VPORT_ADMIN_STATE_DOWN); - - esw_vport_cleanup_acl(esw, vport); + esw_vport_cleanup(esw, vport); esw->enabled_vports--; done: @@ -1945,6 +1945,59 @@ static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw) /* Public E-Switch API */ #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev)) +int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, + enum mlx5_eswitch_vport_event enabled_events) +{ + int err; + + err = esw_enable_vport(esw, vport_num, enabled_events); + if (err) + return err; + + err = esw_offloads_load_rep(esw, vport_num); + if (err) + goto err_rep; + + return err; + +err_rep: + esw_disable_vport(esw, vport_num); + return err; +} + +void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + esw_offloads_unload_rep(esw, vport_num); + esw_disable_vport(esw, vport_num); +} + +void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs) +{ + int i; + + mlx5_esw_for_each_vf_vport_num_reverse(esw, i, num_vfs) + mlx5_eswitch_unload_vport(esw, i); +} + +int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs, + enum mlx5_eswitch_vport_event enabled_events) +{ + int err; + int i; + + mlx5_esw_for_each_vf_vport_num(esw, i, num_vfs) { + err = mlx5_eswitch_load_vport(esw, i, enabled_events); + if (err) + goto vf_err; + } + + return 0; + +vf_err: + mlx5_eswitch_unload_vf_vports(esw, i - 1); + return err; +} + /* mlx5_eswitch_enable_pf_vf_vports() enables vports of PF, ECPF and VFs * whichever are present on the eswitch. */ @@ -1952,46 +2005,33 @@ int mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, enum mlx5_eswitch_vport_event enabled_events) { - struct mlx5_vport *vport; - int num_vfs; int ret; - int i; /* Enable PF vport */ - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); - ret = esw_enable_vport(esw, vport, enabled_events); + ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_PF, enabled_events); if (ret) return ret; /* Enable ECPF vport */ if (mlx5_ecpf_vport_exists(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); - ret = esw_enable_vport(esw, vport, enabled_events); + ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_ECPF, enabled_events); if (ret) goto ecpf_err; } /* Enable VF vports */ - mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) { - ret = esw_enable_vport(esw, vport, enabled_events); - if (ret) - goto vf_err; - } + ret = mlx5_eswitch_load_vf_vports(esw, esw->esw_funcs.num_vfs, + enabled_events); + if (ret) + goto vf_err; return 0; vf_err: - num_vfs = i - 1; - mlx5_esw_for_each_vf_vport_reverse(esw, i, vport, num_vfs) - esw_disable_vport(esw, vport); - - if (mlx5_ecpf_vport_exists(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); - esw_disable_vport(esw, vport); - } + if (mlx5_ecpf_vport_exists(esw->dev)) + mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF); ecpf_err: - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); - esw_disable_vport(esw, vport); + mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF); return ret; } @@ -2000,11 +2040,12 @@ ecpf_err: */ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) { - struct mlx5_vport *vport; - int i; + mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); - mlx5_esw_for_all_vports_reverse(esw, i, vport) - esw_disable_vport(esw, vport); + if (mlx5_ecpf_vport_exists(esw->dev)) + mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF); + + mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF); } static void mlx5_eswitch_get_devlink_param(struct mlx5_eswitch *esw) @@ -2198,6 +2239,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); esw_offloads_cleanup_reps(esw); + mutex_destroy(&esw->state_lock); mutex_destroy(&esw->offloads.mod_hdr.lock); mutex_destroy(&esw->offloads.encap_tbl_lock); kfree(esw->vports); @@ -2432,12 +2474,11 @@ static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw, } /* Star rule to forward all traffic to uplink vport */ - memset(spec, 0, sizeof(*spec)); memset(&dest, 0, sizeof(dest)); dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.vport.num = MLX5_VPORT_UPLINK; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, spec, + flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, NULL, &flow_act, &dest, 1); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); @@ -2622,9 +2663,13 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, u64 bytes = 0; int err = 0; - if (!vport->enabled || esw->mode != MLX5_ESWITCH_LEGACY) + if (esw->mode != MLX5_ESWITCH_LEGACY) return 0; + mutex_lock(&esw->state_lock); + if (!vport->enabled) + goto unlock; + if (vport->egress.legacy.drop_counter) mlx5_fc_query(dev, vport->egress.legacy.drop_counter, &stats->rx_dropped, &bytes); @@ -2635,20 +2680,22 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, if (!MLX5_CAP_GEN(dev, receive_discard_vport_down) && !MLX5_CAP_GEN(dev, transmit_discard_vport_down)) - return 0; + goto unlock; err = mlx5_query_vport_down_stats(dev, vport->vport, 1, &rx_discard_vport_down, &tx_discard_vport_down); if (err) - return err; + goto unlock; if (MLX5_CAP_GEN(dev, receive_discard_vport_down)) stats->rx_dropped += rx_discard_vport_down; if (MLX5_CAP_GEN(dev, transmit_discard_vport_down)) stats->tx_dropped += tx_discard_vport_down; - return 0; +unlock: + mutex_unlock(&esw->state_lock); + return err; } int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 99073342b500..95532b258c2b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -42,6 +42,7 @@ #include <linux/mlx5/vport.h> #include <linux/mlx5/fs.h> #include "lib/mpfs.h" +#include "en/tc_ct.h" #define FDB_TC_MAX_CHAIN 3 #define FDB_FT_CHAIN (FDB_TC_MAX_CHAIN + 1) @@ -197,6 +198,10 @@ struct mlx5_eswitch_fdb { }; struct mlx5_esw_offload { + struct mlx5_flow_table *ft_offloads_restore; + struct mlx5_flow_group *restore_group; + struct mlx5_modify_hdr *restore_copy_hdr_id; + struct mlx5_flow_table *ft_offloads; struct mlx5_flow_group *vport_rx_group; struct mlx5_eswitch_rep *vport_reps; @@ -232,6 +237,7 @@ struct mlx5_esw_functions { enum { MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0), + MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED = BIT(1), }; struct mlx5_eswitch { @@ -326,6 +332,7 @@ struct mlx5_termtbl_handle; bool mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr, struct mlx5_flow_act *flow_act, struct mlx5_flow_spec *spec); @@ -386,6 +393,8 @@ enum { enum { MLX5_ESW_ATTR_FLAG_VLAN_HANDLED = BIT(0), MLX5_ESW_ATTR_FLAG_SLOW_PATH = BIT(1), + MLX5_ESW_ATTR_FLAG_NO_IN_PORT = BIT(2), + MLX5_ESW_ATTR_FLAG_HAIRPIN = BIT(3), }; struct mlx5_esw_flow_attr { @@ -416,6 +425,9 @@ struct mlx5_esw_flow_attr { u16 prio; u32 dest_chain; u32 flags; + struct mlx5_flow_table *fdb; + struct mlx5_flow_table *dest_ft; + struct mlx5_ct_attr ct_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; }; @@ -443,6 +455,11 @@ int mlx5_esw_create_vport_egress_acl_vlan(struct mlx5_eswitch *esw, struct mlx5_vport *vport, u16 vlan_id, u32 flow_action); +static inline bool mlx5_esw_qos_enabled(struct mlx5_eswitch *esw) +{ + return esw->qos.enabled; +} + static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev, u8 vlan_depth) { @@ -636,6 +653,22 @@ esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw, int mlx5_esw_vport_tbl_get(struct mlx5_eswitch *esw); void mlx5_esw_vport_tbl_put(struct mlx5_eswitch *esw); +struct mlx5_flow_handle * +esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag); +u32 +esw_get_max_restore_tag(struct mlx5_eswitch *esw); + +int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num); +void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num); + +int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, + enum mlx5_eswitch_vport_event enabled_events); +void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num); + +int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs, + enum mlx5_eswitch_vport_event enabled_events); +void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs); + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } @@ -651,6 +684,12 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) static inline void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) {} +static inline struct mlx5_flow_handle * +esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) +{ + return ERR_PTR(-EOPNOTSUPP); +} + #endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 1a909d47aee2..0b4b43ebae9a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -260,7 +260,8 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, attr->in_rep->vport)); misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); - MLX5_SET_TO_ONES(fte_match_set_misc2, misc2, metadata_reg_c_0); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); @@ -299,6 +300,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, bool split = !!(attr->split_count); struct mlx5_flow_handle *rule; struct mlx5_flow_table *fdb; + bool hairpin = false; int j, i = 0; if (esw->mode != MLX5_ESWITCH_OFFLOADS) @@ -323,7 +325,12 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { struct mlx5_flow_table *ft; - if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) { + if (attr->dest_ft) { + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = attr->dest_ft; + i++; + } else if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) { flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[i].ft = mlx5_esw_chains_get_tc_end_ft(esw); @@ -377,31 +384,41 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (split) { fdb = esw_vport_tbl_get(esw, attr); } else { - fdb = mlx5_esw_chains_get_table(esw, attr->chain, attr->prio, - 0); - mlx5_eswitch_set_rule_source_port(esw, spec, attr); + if (attr->chain || attr->prio) + fdb = mlx5_esw_chains_get_table(esw, attr->chain, + attr->prio, 0); + else + fdb = attr->fdb; + + if (!(attr->flags & MLX5_ESW_ATTR_FLAG_NO_IN_PORT)) + mlx5_eswitch_set_rule_source_port(esw, spec, attr); } if (IS_ERR(fdb)) { rule = ERR_CAST(fdb); goto err_esw_get; } - if (mlx5_eswitch_termtbl_required(esw, &flow_act, spec)) + if (mlx5_eswitch_termtbl_required(esw, attr, &flow_act, spec)) { rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, attr, &flow_act, dest, i); - else + hairpin = true; + } else { rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i); + } if (IS_ERR(rule)) goto err_add_rule; else atomic64_inc(&esw->offloads.num_flows); + if (hairpin) + attr->flags |= MLX5_ESW_ATTR_FLAG_HAIRPIN; + return rule; err_add_rule: if (split) esw_vport_tbl_put(esw, attr); - else + else if (attr->chain || attr->prio) mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0); err_esw_get: if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) && attr->dest_chain) @@ -484,10 +501,12 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, mlx5_del_flow_rules(rule); - /* unref the term table */ - for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { - if (attr->dests[i].termtbl) - mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl); + if (attr->flags & MLX5_ESW_ATTR_FLAG_HAIRPIN) { + /* unref the term table */ + for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { + if (attr->dests[i].termtbl) + mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl); + } } atomic64_dec(&esw->offloads.num_flows); @@ -498,7 +517,7 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, } else { if (split) esw_vport_tbl_put(esw, attr); - else + else if (attr->chain || attr->prio) mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0); if (attr->dest_chain) @@ -762,14 +781,21 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule) mlx5_del_flow_rules(rule); } +static bool mlx5_eswitch_reg_c1_loopback_supported(struct mlx5_eswitch *esw) +{ + return MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) & + MLX5_FDB_TO_VPORT_REG_C_1; +} + static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable) { u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {}; - u8 fdb_to_vport_reg_c_id; + u8 curr, wanted; int err; - if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) + if (!mlx5_eswitch_reg_c1_loopback_supported(esw) && + !mlx5_eswitch_vport_match_metadata_enabled(esw)) return 0; err = mlx5_eswitch_query_esw_vport_context(esw->dev, 0, false, @@ -777,22 +803,33 @@ static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable) if (err) return err; - fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out, - esw_vport_context.fdb_to_vport_reg_c_id); + curr = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.fdb_to_vport_reg_c_id); + wanted = MLX5_FDB_TO_VPORT_REG_C_0; + if (mlx5_eswitch_reg_c1_loopback_supported(esw)) + wanted |= MLX5_FDB_TO_VPORT_REG_C_1; if (enable) - fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0; + curr |= wanted; else - fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0; + curr &= ~wanted; MLX5_SET(modify_esw_vport_context_in, in, - esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id); + esw_vport_context.fdb_to_vport_reg_c_id, curr); MLX5_SET(modify_esw_vport_context_in, in, field_select.fdb_to_vport_reg_c_id, 1); - return mlx5_eswitch_modify_esw_vport_context(esw->dev, 0, false, - in, sizeof(in)); + err = mlx5_eswitch_modify_esw_vport_context(esw->dev, 0, false, in, + sizeof(in)); + if (!err) { + if (enable && (curr & MLX5_FDB_TO_VPORT_REG_C_1)) + esw->flags |= MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED; + else + esw->flags &= ~MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED; + } + + return err; } static void peer_miss_rules_setup(struct mlx5_eswitch *esw, @@ -805,7 +842,8 @@ static void peer_miss_rules_setup(struct mlx5_eswitch *esw, if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); - MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; } else { @@ -1020,6 +1058,56 @@ out: return err; } +struct mlx5_flow_handle * +esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) +{ + struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, }; + struct mlx5_flow_table *ft = esw->offloads.ft_offloads_restore; + struct mlx5_flow_context *flow_context; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_destination dest; + struct mlx5_flow_spec *spec; + void *misc; + + spec = kzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + ESW_CHAIN_TAG_METADATA_MASK); + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, tag); + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.modify_hdr = esw->offloads.restore_copy_hdr_id; + + flow_context = &spec->flow_context; + flow_context->flags |= FLOW_CONTEXT_HAS_TAG; + flow_context->flow_tag = tag; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = esw->offloads.ft_offloads; + + flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + kfree(spec); + + if (IS_ERR(flow_rule)) + esw_warn(esw->dev, + "Failed to create restore rule for tag: %d, err(%d)\n", + tag, (int)PTR_ERR(flow_rule)); + + return flow_rule; +} + +u32 +esw_get_max_restore_tag(struct mlx5_eswitch *esw) +{ + return ESW_CHAIN_TAG_METADATA_MASK; +} + #define MAX_PF_SQ 256 #define MAX_SQ_NVPORTS 32 @@ -1035,8 +1123,9 @@ static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS_2); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - misc_parameters_2.metadata_reg_c_0); + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); } else { MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, @@ -1241,6 +1330,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw, int nvports) } ft_attr.max_fte = nvports + MLX5_ESW_MISS_FLOWS; + ft_attr.prio = 1; ft_offloads = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft_offloads)) { @@ -1318,7 +1408,8 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, mlx5_eswitch_get_vport_metadata_for_match(esw, vport)); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); - MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; } else { @@ -1344,6 +1435,7 @@ out: return flow_rule; } + static int mlx5_eswitch_inline_mode_get(const struct mlx5_eswitch *esw, u8 *mode) { u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2; @@ -1379,6 +1471,102 @@ query_vports: out: *mode = mlx5_mode; return 0; +} + +static void esw_destroy_restore_table(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + + mlx5_modify_header_dealloc(esw->dev, offloads->restore_copy_hdr_id); + mlx5_destroy_flow_group(offloads->restore_group); + mlx5_destroy_flow_table(offloads->ft_offloads_restore); +} + +static int esw_create_restore_table(struct mlx5_eswitch *esw) +{ + u8 modact[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {}; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *ns; + struct mlx5_modify_hdr *mod_hdr; + void *match_criteria, *misc; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *g; + u32 *flow_group_in; + int err = 0; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS); + if (!ns) { + esw_warn(esw->dev, "Failed to get offloads flow namespace\n"); + return -EOPNOTSUPP; + } + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) { + err = -ENOMEM; + goto out_free; + } + + ft_attr.max_fte = 1 << ESW_CHAIN_TAG_METADATA_BITS; + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + esw_warn(esw->dev, "Failed to create restore table, err %d\n", + err); + goto out_free; + } + + memset(flow_group_in, 0, inlen); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + misc = MLX5_ADDR_OF(fte_match_param, match_criteria, + misc_parameters_2); + + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + ESW_CHAIN_TAG_METADATA_MASK); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ft_attr.max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + g = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create restore flow group, err: %d\n", + err); + goto err_group; + } + + MLX5_SET(copy_action_in, modact, action_type, MLX5_ACTION_TYPE_COPY); + MLX5_SET(copy_action_in, modact, src_field, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_1); + MLX5_SET(copy_action_in, modact, dst_field, + MLX5_ACTION_IN_FIELD_METADATA_REG_B); + mod_hdr = mlx5_modify_header_alloc(esw->dev, + MLX5_FLOW_NAMESPACE_KERNEL, 1, + modact); + if (IS_ERR(mod_hdr)) { + esw_warn(dev, "Failed to create restore mod header, err: %d\n", + err); + err = PTR_ERR(mod_hdr); + goto err_mod_hdr; + } + + esw->offloads.ft_offloads_restore = ft; + esw->offloads.restore_group = g; + esw->offloads.restore_copy_hdr_id = mod_hdr; + + return 0; + +err_mod_hdr: + mlx5_destroy_flow_group(g); +err_group: + mlx5_destroy_flow_table(ft); +out_free: + kvfree(flow_group_in); + + return err; } static int esw_offloads_start(struct mlx5_eswitch *esw, @@ -1454,187 +1642,66 @@ static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, esw->offloads.rep_ops[rep_type]->unload(rep); } -static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) -{ - struct mlx5_eswitch_rep *rep; - - if (mlx5_ecpf_vport_exists(esw->dev)) { - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF); - __esw_offloads_unload_rep(esw, rep, rep_type); - } - - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); - __esw_offloads_unload_rep(esw, rep, rep_type); - } - - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); - __esw_offloads_unload_rep(esw, rep, rep_type); -} - -static void __unload_reps_vf_vport(struct mlx5_eswitch *esw, int nvports, - u8 rep_type) +static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) { struct mlx5_eswitch_rep *rep; int i; - mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvports) + mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, esw->esw_funcs.num_vfs) __esw_offloads_unload_rep(esw, rep, rep_type); -} - -static void esw_offloads_unload_vf_reps(struct mlx5_eswitch *esw, int nvports) -{ - u8 rep_type = NUM_REP_TYPES; - - while (rep_type-- > 0) - __unload_reps_vf_vport(esw, nvports, rep_type); -} - -static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) -{ - __unload_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type); - - /* Special vports must be the last to unload. */ - __unload_reps_special_vport(esw, rep_type); -} - -static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw) -{ - u8 rep_type = NUM_REP_TYPES; - - while (rep_type-- > 0) - __unload_reps_all_vport(esw, rep_type); -} - -static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep, u8 rep_type) -{ - int err = 0; - - if (atomic_cmpxchg(&rep->rep_data[rep_type].state, - REP_REGISTERED, REP_LOADED) == REP_REGISTERED) { - err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep); - if (err) - atomic_set(&rep->rep_data[rep_type].state, - REP_REGISTERED); - } - - return err; -} - -static int __load_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) -{ - struct mlx5_eswitch_rep *rep; - int err; - - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); - err = __esw_offloads_load_rep(esw, rep, rep_type); - if (err) - return err; - - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); - err = __esw_offloads_load_rep(esw, rep, rep_type); - if (err) - goto err_pf; - } if (mlx5_ecpf_vport_exists(esw->dev)) { rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF); - err = __esw_offloads_load_rep(esw, rep, rep_type); - if (err) - goto err_ecpf; + __esw_offloads_unload_rep(esw, rep, rep_type); } - return 0; - -err_ecpf: if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); __esw_offloads_unload_rep(esw, rep, rep_type); } -err_pf: rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); __esw_offloads_unload_rep(esw, rep, rep_type); - return err; } -static int __load_reps_vf_vport(struct mlx5_eswitch *esw, int nvports, - u8 rep_type) +int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num) { struct mlx5_eswitch_rep *rep; - int err, i; - - mlx5_esw_for_each_vf_rep(esw, i, rep, nvports) { - err = __esw_offloads_load_rep(esw, rep, rep_type); - if (err) - goto err_vf; - } - - return 0; - -err_vf: - __unload_reps_vf_vport(esw, --i, rep_type); - return err; -} - -static int __load_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) -{ + int rep_type; int err; - /* Special vports must be loaded first, uplink rep creates mdev resource. */ - err = __load_reps_special_vport(esw, rep_type); - if (err) - return err; + if (esw->mode != MLX5_ESWITCH_OFFLOADS) + return 0; - err = __load_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type); - if (err) - goto err_vfs; + rep = mlx5_eswitch_get_rep(esw, vport_num); + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) + if (atomic_cmpxchg(&rep->rep_data[rep_type].state, + REP_REGISTERED, REP_LOADED) == REP_REGISTERED) { + err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep); + if (err) + goto err_reps; + } return 0; -err_vfs: - __unload_reps_special_vport(esw, rep_type); - return err; -} - -static int esw_offloads_load_vf_reps(struct mlx5_eswitch *esw, int nvports) -{ - u8 rep_type = 0; - int err; - - for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { - err = __load_reps_vf_vport(esw, nvports, rep_type); - if (err) - goto err_reps; - } - - return err; - err_reps: - while (rep_type-- > 0) - __unload_reps_vf_vport(esw, nvports, rep_type); + atomic_set(&rep->rep_data[rep_type].state, REP_REGISTERED); + for (--rep_type; rep_type >= 0; rep_type--) + __esw_offloads_unload_rep(esw, rep, rep_type); return err; } -static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw) +void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num) { - u8 rep_type = 0; - int err; - - for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { - err = __load_reps_all_vport(esw, rep_type); - if (err) - goto err_reps; - } + struct mlx5_eswitch_rep *rep; + int rep_type; - return err; + if (esw->mode != MLX5_ESWITCH_OFFLOADS) + return; -err_reps: - while (rep_type-- > 0) - __unload_reps_all_vport(esw, rep_type); - return err; + rep = mlx5_eswitch_get_rep(esw, vport_num); + for (rep_type = NUM_REP_TYPES - 1; rep_type >= 0; rep_type--) + __esw_offloads_unload_rep(esw, rep, rep_type); } #define ESW_OFFLOADS_DEVCOM_PAIR (0) @@ -1822,14 +1889,21 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { u8 action[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {}; - static const struct mlx5_flow_spec spec = {}; struct mlx5_flow_act flow_act = {}; int err = 0; + u32 key; + + key = mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport); + key >>= ESW_SOURCE_PORT_METADATA_OFFSET; MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); - MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_0); - MLX5_SET(set_action_in, action, data, - mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport)); + MLX5_SET(set_action_in, action, field, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_0); + MLX5_SET(set_action_in, action, data, key); + MLX5_SET(set_action_in, action, offset, + ESW_SOURCE_PORT_METADATA_OFFSET); + MLX5_SET(set_action_in, action, length, + ESW_SOURCE_PORT_METADATA_BITS); vport->ingress.offloads.modify_metadata = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, @@ -1846,7 +1920,7 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, flow_act.modify_hdr = vport->ingress.offloads.modify_metadata; vport->ingress.offloads.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl, - &spec, &flow_act, NULL, 0); + NULL, &flow_act, NULL, 0); if (IS_ERR(vport->ingress.offloads.modify_metadata_rule)) { err = PTR_ERR(vport->ingress.offloads.modify_metadata_rule); esw_warn(esw->dev, @@ -2144,13 +2218,17 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw) if (err) return err; - err = esw_create_offloads_fdb_tables(esw, total_vports); + err = esw_create_offloads_table(esw, total_vports); if (err) - goto create_fdb_err; + goto create_offloads_err; - err = esw_create_offloads_table(esw, total_vports); + err = esw_create_restore_table(esw); if (err) - goto create_ft_err; + goto create_restore_err; + + err = esw_create_offloads_fdb_tables(esw, total_vports); + if (err) + goto create_fdb_err; err = esw_create_vport_rx_group(esw, total_vports); if (err) @@ -2162,12 +2240,12 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw) return 0; create_fg_err: - esw_destroy_offloads_table(esw); - -create_ft_err: esw_destroy_offloads_fdb_tables(esw); - create_fdb_err: + esw_destroy_restore_table(esw); +create_restore_err: + esw_destroy_offloads_table(esw); +create_offloads_err: esw_destroy_uplink_offloads_acl_tables(esw); return err; @@ -2177,8 +2255,9 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) { mutex_destroy(&esw->fdb_table.offloads.vports.lock); esw_destroy_vport_rx_group(esw); - esw_destroy_offloads_table(esw); esw_destroy_offloads_fdb_tables(esw); + esw_destroy_restore_table(esw); + esw_destroy_offloads_table(esw); esw_destroy_uplink_offloads_acl_tables(esw); } @@ -2198,11 +2277,12 @@ esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out) /* Number of VFs can only change from "0 to x" or "x to 0". */ if (esw->esw_funcs.num_vfs > 0) { - esw_offloads_unload_vf_reps(esw, esw->esw_funcs.num_vfs); + mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); } else { int err; - err = esw_offloads_load_vf_reps(esw, new_num_vfs); + err = mlx5_eswitch_load_vf_vports(esw, new_num_vfs, + MLX5_VPORT_UC_ADDR_CHANGE); if (err) return; } @@ -2260,6 +2340,7 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) else esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; + mutex_init(&esw->offloads.termtbl_mutex); mlx5_rdma_enable_roce(esw->dev); err = esw_offloads_steering_init(esw); if (err) @@ -2273,27 +2354,28 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN; - err = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE); + /* Uplink vport rep must load first. */ + err = esw_offloads_load_rep(esw, MLX5_VPORT_UPLINK); if (err) - goto err_vports; + goto err_uplink; - err = esw_offloads_load_all_reps(esw); + err = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE); if (err) - goto err_reps; + goto err_vports; esw_offloads_devcom_init(esw); - mutex_init(&esw->offloads.termtbl_mutex); return 0; -err_reps: - mlx5_eswitch_disable_pf_vf_vports(esw); err_vports: + esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK); +err_uplink: esw_set_passing_vport_metadata(esw, false); err_vport_metadata: esw_offloads_steering_cleanup(esw); err_steering_init: mlx5_rdma_disable_roce(esw->dev); + mutex_destroy(&esw->offloads.termtbl_mutex); return err; } @@ -2319,11 +2401,12 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, void esw_offloads_disable(struct mlx5_eswitch *esw) { esw_offloads_devcom_cleanup(esw); - esw_offloads_unload_all_reps(esw); mlx5_eswitch_disable_pf_vf_vports(esw); + esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK); esw_set_passing_vport_metadata(esw, false); esw_offloads_steering_cleanup(esw); mlx5_rdma_disable_roce(esw->dev); + mutex_destroy(&esw->offloads.termtbl_mutex); esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; } @@ -2403,10 +2486,8 @@ static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode) return 0; } -static int mlx5_devlink_eswitch_check(struct devlink *devlink) +static int mlx5_eswitch_check(const struct mlx5_core_dev *dev) { - struct mlx5_core_dev *dev = devlink_priv(devlink); - if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) return -EOPNOTSUPP; @@ -2427,7 +2508,7 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, u16 cur_mlx5_mode, mlx5_mode = 0; int err; - err = mlx5_devlink_eswitch_check(devlink); + err = mlx5_eswitch_check(dev); if (err) return err; @@ -2452,7 +2533,7 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) struct mlx5_core_dev *dev = devlink_priv(devlink); int err; - err = mlx5_devlink_eswitch_check(devlink); + err = mlx5_eswitch_check(dev); if (err) return err; @@ -2467,7 +2548,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, int err, vport, num_vport; u8 mlx5_mode; - err = mlx5_devlink_eswitch_check(devlink); + err = mlx5_eswitch_check(dev); if (err) return err; @@ -2521,7 +2602,7 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) struct mlx5_eswitch *esw = dev->priv.eswitch; int err; - err = mlx5_devlink_eswitch_check(devlink); + err = mlx5_eswitch_check(dev); if (err) return err; @@ -2536,7 +2617,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, struct mlx5_eswitch *esw = dev->priv.eswitch; int err; - err = mlx5_devlink_eswitch_check(devlink); + err = mlx5_eswitch_check(dev); if (err) return err; @@ -2585,7 +2666,7 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, struct mlx5_eswitch *esw = dev->priv.eswitch; int err; - err = mlx5_devlink_eswitch_check(devlink); + err = mlx5_eswitch_check(dev); if (err) return err; @@ -2593,6 +2674,21 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, return 0; } +static bool +mlx5_eswitch_vport_has_rep(const struct mlx5_eswitch *esw, u16 vport_num) +{ + /* Currently, only ECPF based device has representor for host PF. */ + if (vport_num == MLX5_VPORT_PF && + !mlx5_core_is_ecpf_esw_manager(esw->dev)) + return false; + + if (vport_num == MLX5_VPORT_ECPF && + !mlx5_ecpf_vport_exists(esw->dev)) + return false; + + return true; +} + void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, const struct mlx5_eswitch_rep_ops *ops, u8 rep_type) @@ -2603,8 +2699,10 @@ void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, esw->offloads.rep_ops[rep_type] = ops; mlx5_esw_for_all_reps(esw, i, rep) { - rep_data = &rep->rep_data[rep_type]; - atomic_set(&rep_data->state, REP_REGISTERED); + if (likely(mlx5_eswitch_vport_has_rep(esw, i))) { + rep_data = &rep->rep_data[rep_type]; + atomic_set(&rep_data->state, REP_REGISTERED); + } } } EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps); @@ -2664,15 +2762,53 @@ bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num) vport_num <= esw->dev->priv.sriov.max_vfs; } +bool mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw) +{ + return !!(esw->flags & MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED); +} +EXPORT_SYMBOL(mlx5_eswitch_reg_c1_loopback_enabled); + bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw) { return !!(esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA); } EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled); -u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, +u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, u16 vport_num) { - return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport_num; + u32 vport_num_mask = GENMASK(ESW_VPORT_BITS - 1, 0); + u32 vhca_id_mask = GENMASK(ESW_VHCA_ID_BITS - 1, 0); + u32 vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); + u32 val; + + /* Make sure the vhca_id fits the ESW_VHCA_ID_BITS */ + WARN_ON_ONCE(vhca_id >= BIT(ESW_VHCA_ID_BITS)); + + /* Trim vhca_id to ESW_VHCA_ID_BITS */ + vhca_id &= vhca_id_mask; + + /* Make sure pf and ecpf map to end of ESW_VPORT_BITS range so they + * don't overlap with VF numbers, and themselves, after trimming. + */ + WARN_ON_ONCE((MLX5_VPORT_UPLINK & vport_num_mask) < + vport_num_mask - 1); + WARN_ON_ONCE((MLX5_VPORT_ECPF & vport_num_mask) < + vport_num_mask - 1); + WARN_ON_ONCE((MLX5_VPORT_UPLINK & vport_num_mask) == + (MLX5_VPORT_ECPF & vport_num_mask)); + + /* Make sure that the VF vport_num fits ESW_VPORT_BITS and don't + * overlap with pf and ecpf. + */ + if (vport_num != MLX5_VPORT_UPLINK && + vport_num != MLX5_VPORT_ECPF) + WARN_ON_ONCE(vport_num >= vport_num_mask - 1); + + /* We can now trim vport_num to ESW_VPORT_BITS */ + vport_num &= vport_num_mask; + + val = (vhca_id << ESW_VPORT_BITS) | vport_num; + return val << (32 - ESW_SOURCE_PORT_METADATA_BITS); } EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c index 8bfa53ea5dd8..1e275a8441de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c @@ -6,20 +6,25 @@ #include <linux/mlx5/fs.h> #include "eswitch_offloads_chains.h" +#include "en/mapping.h" #include "mlx5_core.h" #include "fs_core.h" #include "eswitch.h" #include "en.h" +#include "en_tc.h" #define esw_chains_priv(esw) ((esw)->fdb_table.offloads.esw_chains_priv) #define esw_chains_lock(esw) (esw_chains_priv(esw)->lock) #define esw_chains_ht(esw) (esw_chains_priv(esw)->chains_ht) +#define esw_chains_mapping(esw) (esw_chains_priv(esw)->chains_mapping) #define esw_prios_ht(esw) (esw_chains_priv(esw)->prios_ht) #define fdb_pool_left(esw) (esw_chains_priv(esw)->fdb_left) #define tc_slow_fdb(esw) ((esw)->fdb_table.offloads.slow_fdb) #define tc_end_fdb(esw) (esw_chains_priv(esw)->tc_end_fdb) #define fdb_ignore_flow_level_supported(esw) \ (MLX5_CAP_ESW_FLOWTABLE_FDB((esw)->dev, ignore_flow_level)) +#define fdb_modify_header_fwd_to_table_supported(esw) \ + (MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table)) /* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS), * and a virtual memory region of 16M (ESW_SIZE), this region is duplicated @@ -43,6 +48,7 @@ struct mlx5_esw_chains_priv { struct mutex lock; struct mlx5_flow_table *tc_end_fdb; + struct mapping_ctx *chains_mapping; int fdb_left[ARRAY_SIZE(ESW_POOLS)]; }; @@ -53,9 +59,12 @@ struct fdb_chain { u32 chain; int ref; + int id; struct mlx5_eswitch *esw; struct list_head prios_list; + struct mlx5_flow_handle *restore_rule; + struct mlx5_modify_hdr *miss_modify_hdr; }; struct fdb_prio_key { @@ -100,7 +109,8 @@ bool mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw) bool mlx5_esw_chains_backwards_supported(struct mlx5_eswitch *esw) { - return fdb_ignore_flow_level_supported(esw); + return mlx5_esw_chains_prios_supported(esw) && + fdb_ignore_flow_level_supported(esw); } u32 mlx5_esw_chains_get_chain_range(struct mlx5_eswitch *esw) @@ -261,6 +271,70 @@ mlx5_esw_chains_destroy_fdb_table(struct mlx5_eswitch *esw, mlx5_destroy_flow_table(fdb); } +static int +create_fdb_chain_restore(struct fdb_chain *fdb_chain) +{ + char modact[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)]; + struct mlx5_eswitch *esw = fdb_chain->esw; + struct mlx5_modify_hdr *mod_hdr; + u32 index; + int err; + + if (fdb_chain->chain == mlx5_esw_chains_get_ft_chain(esw)) + return 0; + + err = mapping_add(esw_chains_mapping(esw), &fdb_chain->chain, &index); + if (err) + return err; + if (index == MLX5_FS_DEFAULT_FLOW_TAG) { + /* we got the special default flow tag id, so we won't know + * if we actually marked the packet with the restore rule + * we create. + * + * This case isn't possible with MLX5_FS_DEFAULT_FLOW_TAG = 0. + */ + err = mapping_add(esw_chains_mapping(esw), + &fdb_chain->chain, &index); + mapping_remove(esw_chains_mapping(esw), + MLX5_FS_DEFAULT_FLOW_TAG); + if (err) + return err; + } + + fdb_chain->id = index; + + MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, modact, field, + mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].mfield); + MLX5_SET(set_action_in, modact, offset, + mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].moffset * 8); + MLX5_SET(set_action_in, modact, length, + mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].mlen * 8); + MLX5_SET(set_action_in, modact, data, fdb_chain->id); + mod_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB, + 1, modact); + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + goto err_mod_hdr; + } + fdb_chain->miss_modify_hdr = mod_hdr; + + fdb_chain->restore_rule = esw_add_restore_rule(esw, fdb_chain->id); + if (IS_ERR(fdb_chain->restore_rule)) { + err = PTR_ERR(fdb_chain->restore_rule); + goto err_rule; + } + + return 0; + +err_rule: + mlx5_modify_header_dealloc(esw->dev, fdb_chain->miss_modify_hdr); +err_mod_hdr: + /* Datapath can't find this mapping, so we can safely remove it */ + mapping_remove(esw_chains_mapping(esw), fdb_chain->id); + return err; +} + static struct fdb_chain * mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain) { @@ -275,6 +349,10 @@ mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain) fdb_chain->chain = chain; INIT_LIST_HEAD(&fdb_chain->prios_list); + err = create_fdb_chain_restore(fdb_chain); + if (err) + goto err_restore; + err = rhashtable_insert_fast(&esw_chains_ht(esw), &fdb_chain->node, chain_params); if (err) @@ -283,6 +361,12 @@ mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain) return fdb_chain; err_insert: + if (fdb_chain->chain != mlx5_esw_chains_get_ft_chain(esw)) { + mlx5_del_flow_rules(fdb_chain->restore_rule); + mlx5_modify_header_dealloc(esw->dev, + fdb_chain->miss_modify_hdr); + } +err_restore: kvfree(fdb_chain); return ERR_PTR(err); } @@ -294,6 +378,15 @@ mlx5_esw_chains_destroy_fdb_chain(struct fdb_chain *fdb_chain) rhashtable_remove_fast(&esw_chains_ht(esw), &fdb_chain->node, chain_params); + + if (fdb_chain->chain != mlx5_esw_chains_get_ft_chain(esw)) { + mlx5_del_flow_rules(fdb_chain->restore_rule); + mlx5_modify_header_dealloc(esw->dev, + fdb_chain->miss_modify_hdr); + + mapping_remove(esw_chains_mapping(esw), fdb_chain->id); + } + kvfree(fdb_chain); } @@ -316,10 +409,11 @@ mlx5_esw_chains_get_fdb_chain(struct mlx5_eswitch *esw, u32 chain) } static struct mlx5_flow_handle * -mlx5_esw_chains_add_miss_rule(struct mlx5_flow_table *fdb, +mlx5_esw_chains_add_miss_rule(struct fdb_chain *fdb_chain, + struct mlx5_flow_table *fdb, struct mlx5_flow_table *next_fdb) { - static const struct mlx5_flow_spec spec = {}; + struct mlx5_eswitch *esw = fdb_chain->esw; struct mlx5_flow_destination dest = {}; struct mlx5_flow_act act = {}; @@ -328,7 +422,13 @@ mlx5_esw_chains_add_miss_rule(struct mlx5_flow_table *fdb, dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = next_fdb; - return mlx5_add_flow_rules(fdb, &spec, &act, &dest, 1); + if (next_fdb == tc_end_fdb(esw) && + fdb_modify_header_fwd_to_table_supported(esw)) { + act.modify_hdr = fdb_chain->miss_modify_hdr; + act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + } + + return mlx5_add_flow_rules(fdb, NULL, &act, &dest, 1); } static int @@ -351,7 +451,8 @@ mlx5_esw_chains_update_prio_prevs(struct fdb_prio *fdb_prio, list_for_each_entry_continue_reverse(pos, &fdb_chain->prios_list, list) { - miss_rules[n] = mlx5_esw_chains_add_miss_rule(pos->fdb, + miss_rules[n] = mlx5_esw_chains_add_miss_rule(fdb_chain, + pos->fdb, next_fdb); if (IS_ERR(miss_rules[n])) { err = PTR_ERR(miss_rules[n]); @@ -465,7 +566,7 @@ mlx5_esw_chains_create_fdb_prio(struct mlx5_eswitch *esw, } /* Add miss rule to next_fdb */ - miss_rule = mlx5_esw_chains_add_miss_rule(fdb, next_fdb); + miss_rule = mlx5_esw_chains_add_miss_rule(fdb_chain, fdb, next_fdb); if (IS_ERR(miss_rule)) { err = PTR_ERR(miss_rule); goto err_miss_rule; @@ -624,12 +725,43 @@ mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw) return tc_end_fdb(esw); } +struct mlx5_flow_table * +mlx5_esw_chains_create_global_table(struct mlx5_eswitch *esw) +{ + int chain, prio, level, err; + + if (!fdb_ignore_flow_level_supported(esw)) { + err = -EOPNOTSUPP; + + esw_warn(esw->dev, + "Couldn't create global flow table, ignore_flow_level not supported."); + goto err_ignore; + } + + chain = mlx5_esw_chains_get_chain_range(esw), + prio = mlx5_esw_chains_get_prio_range(esw); + level = mlx5_esw_chains_get_level_range(esw); + + return mlx5_esw_chains_create_fdb_table(esw, chain, prio, level); + +err_ignore: + return ERR_PTR(err); +} + +void +mlx5_esw_chains_destroy_global_table(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ft) +{ + mlx5_esw_chains_destroy_fdb_table(esw, ft); +} + static int mlx5_esw_chains_init(struct mlx5_eswitch *esw) { struct mlx5_esw_chains_priv *chains_priv; struct mlx5_core_dev *dev = esw->dev; u32 max_flow_counter, fdb_max; + struct mapping_ctx *mapping; int err; chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL); @@ -651,6 +783,13 @@ mlx5_esw_chains_init(struct mlx5_eswitch *esw) esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) { esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; esw_warn(dev, "Tc chains and priorities offload aren't supported, update firmware if needed\n"); + } else if (!fdb_modify_header_fwd_to_table_supported(esw)) { + /* Disabled when ttl workaround is needed, e.g + * when ESWITCH_IPV4_TTL_MODIFY_ENABLE = true in mlxconfig + */ + esw_warn(dev, + "Tc chains and priorities offload aren't supported, check firmware version, or mlxconfig settings\n"); + esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; } else { esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; esw_info(dev, "Supported tc offload range - chains: %u, prios: %u\n", @@ -666,10 +805,20 @@ mlx5_esw_chains_init(struct mlx5_eswitch *esw) if (err) goto init_prios_ht_err; + mapping = mapping_create(sizeof(u32), esw_get_max_restore_tag(esw), + true); + if (IS_ERR(mapping)) { + err = PTR_ERR(mapping); + goto mapping_err; + } + esw_chains_mapping(esw) = mapping; + mutex_init(&esw_chains_lock(esw)); return 0; +mapping_err: + rhashtable_destroy(&esw_prios_ht(esw)); init_prios_ht_err: rhashtable_destroy(&esw_chains_ht(esw)); init_chains_ht_err: @@ -681,6 +830,7 @@ static void mlx5_esw_chains_cleanup(struct mlx5_eswitch *esw) { mutex_destroy(&esw_chains_lock(esw)); + mapping_destroy(esw_chains_mapping(esw)); rhashtable_destroy(&esw_prios_ht(esw)); rhashtable_destroy(&esw_chains_ht(esw)); @@ -759,3 +909,30 @@ mlx5_esw_chains_destroy(struct mlx5_eswitch *esw) mlx5_esw_chains_close(esw); mlx5_esw_chains_cleanup(esw); } + +int +mlx5_esw_chains_get_chain_mapping(struct mlx5_eswitch *esw, u32 chain, + u32 *chain_mapping) +{ + return mapping_add(esw_chains_mapping(esw), &chain, chain_mapping); +} + +int +mlx5_esw_chains_put_chain_mapping(struct mlx5_eswitch *esw, u32 chain_mapping) +{ + return mapping_remove(esw_chains_mapping(esw), chain_mapping); +} + +int mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag, + u32 *chain) +{ + int err; + + err = mapping_find(esw_chains_mapping(esw), tag, chain); + if (err) { + esw_warn(esw->dev, "Can't find chain for tag: %d\n", tag); + return -ENOENT; + } + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h index 4ae2baf2a7a1..f3b9ae6798f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h @@ -25,8 +25,23 @@ mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, struct mlx5_flow_table * mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw); +struct mlx5_flow_table * +mlx5_esw_chains_create_global_table(struct mlx5_eswitch *esw); +void +mlx5_esw_chains_destroy_global_table(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ft); + +int +mlx5_esw_chains_get_chain_mapping(struct mlx5_eswitch *esw, u32 chain, + u32 *chain_mapping); +int +mlx5_esw_chains_put_chain_mapping(struct mlx5_eswitch *esw, + u32 chain_mapping); + int mlx5_esw_chains_create(struct mlx5_eswitch *esw); void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw); -#endif /* __ML5_ESW_CHAINS_H__ */ +int +mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag, u32 *chain); +#endif /* __ML5_ESW_CHAINS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index f3a925e5ba88..17a0d2bc102b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -3,6 +3,7 @@ #include <linux/mlx5/fs.h> #include "eswitch.h" +#include "fs_core.h" struct mlx5_termtbl_handle { struct hlist_node termtbl_hlist; @@ -28,6 +29,10 @@ mlx5_eswitch_termtbl_hash(struct mlx5_flow_act *flow_act, sizeof(dest->vport.num), hash); hash = jhash((const void *)&dest->vport.vhca_id, sizeof(dest->vport.num), hash); + if (dest->vport.pkt_reformat) + hash = jhash(dest->vport.pkt_reformat, + sizeof(*dest->vport.pkt_reformat), + hash); return hash; } @@ -37,11 +42,19 @@ mlx5_eswitch_termtbl_cmp(struct mlx5_flow_act *flow_act1, struct mlx5_flow_act *flow_act2, struct mlx5_flow_destination *dest2) { - return flow_act1->action != flow_act2->action || - dest1->vport.num != dest2->vport.num || - dest1->vport.vhca_id != dest2->vport.vhca_id || - memcmp(&flow_act1->vlan, &flow_act2->vlan, - sizeof(flow_act1->vlan)); + int ret; + + ret = flow_act1->action != flow_act2->action || + dest1->vport.num != dest2->vport.num || + dest1->vport.vhca_id != dest2->vport.vhca_id || + memcmp(&flow_act1->vlan, &flow_act2->vlan, + sizeof(flow_act1->vlan)); + if (ret) + return ret; + + return dest1->vport.pkt_reformat && dest2->vport.pkt_reformat ? + memcmp(dest1->vport.pkt_reformat, dest2->vport.pkt_reformat, + sizeof(*dest1->vport.pkt_reformat)) : 0; } static int @@ -49,7 +62,6 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, struct mlx5_termtbl_handle *tt, struct mlx5_flow_act *flow_act) { - static const struct mlx5_flow_spec spec = {}; struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_namespace *root_ns; int err; @@ -63,7 +75,8 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, /* As this is the terminating action then the termination table is the * same prio as the slow path */ - ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION; + ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION | + MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; ft_attr.prio = FDB_SLOW_PATH; ft_attr.max_fte = 1; ft_attr.autogroup.max_num_groups = 1; @@ -73,9 +86,8 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, return -EOPNOTSUPP; } - tt->rule = mlx5_add_flow_rules(tt->termtbl, &spec, flow_act, + tt->rule = mlx5_add_flow_rules(tt->termtbl, NULL, flow_act, &tt->dest, 1); - if (IS_ERR(tt->rule)) { esw_warn(dev, "Failed to create termination table rule\n"); goto add_flow_err; @@ -93,7 +105,8 @@ add_flow_err: static struct mlx5_termtbl_handle * mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw, struct mlx5_flow_act *flow_act, - struct mlx5_flow_destination *dest) + struct mlx5_flow_destination *dest, + struct mlx5_esw_flow_attr *attr) { struct mlx5_termtbl_handle *tt; bool found = false; @@ -101,7 +114,6 @@ mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw, int err; mutex_lock(&esw->offloads.termtbl_mutex); - hash_key = mlx5_eswitch_termtbl_hash(flow_act, dest); hash_for_each_possible(esw->offloads.termtbl_tbl, tt, termtbl_hlist, hash_key) { @@ -123,6 +135,7 @@ mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw, tt->dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; tt->dest.vport.num = dest->vport.num; tt->dest.vport.vhca_id = dest->vport.vhca_id; + tt->dest.vport.flags = dest->vport.flags; memcpy(&tt->flow_act, flow_act, sizeof(*flow_act)); err = mlx5_eswitch_termtbl_create(esw->dev, tt, flow_act); @@ -157,25 +170,44 @@ mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw, } } +static bool mlx5_eswitch_termtbl_is_encap_reformat(struct mlx5_pkt_reformat *rt) +{ + switch (rt->reformat_type) { + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_NVGRE: + case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL: + case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL: + return true; + default: + return false; + } +} + static void mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src, struct mlx5_flow_act *dst) { - if (!(src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)) - return; - - src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; - dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; - memcpy(&dst->vlan[0], &src->vlan[0], sizeof(src->vlan[0])); - memset(&src->vlan[0], 0, sizeof(src->vlan[0])); - - if (!(src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2)) - return; + if (src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) { + src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + memcpy(&dst->vlan[0], &src->vlan[0], sizeof(src->vlan[0])); + memset(&src->vlan[0], 0, sizeof(src->vlan[0])); + + if (src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) { + src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; + dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; + memcpy(&dst->vlan[1], &src->vlan[1], sizeof(src->vlan[1])); + memset(&src->vlan[1], 0, sizeof(src->vlan[1])); + } + } - src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; - dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; - memcpy(&dst->vlan[1], &src->vlan[1], sizeof(src->vlan[1])); - memset(&src->vlan[1], 0, sizeof(src->vlan[1])); + if (src->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT && + mlx5_eswitch_termtbl_is_encap_reformat(src->pkt_reformat)) { + src->action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + dst->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + dst->pkt_reformat = src->pkt_reformat; + src->pkt_reformat = NULL; + } } static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw, @@ -196,15 +228,27 @@ static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw, bool mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr, struct mlx5_flow_act *flow_act, struct mlx5_flow_spec *spec) { - if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table)) + int i; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table) || + attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH || + !mlx5_eswitch_offload_is_uplink_port(esw, spec)) return false; /* push vlan on RX */ - return (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) && - mlx5_eswitch_offload_is_uplink_port(esw, spec); + if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) + return true; + + /* hairpin */ + for (i = attr->split_count; i < attr->out_count; i++) + if (attr->dests[i].rep->vport == MLX5_VPORT_UPLINK) + return true; + + return false; } struct mlx5_flow_handle * @@ -234,7 +278,7 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, /* get the terminating table for the action list */ tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act, - &dest[i]); + &dest[i], attr); if (IS_ERR(tt)) { esw_warn(esw->dev, "Failed to create termination table\n"); goto revert_changes; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 4e627e685a02..c93bd55fab06 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -111,8 +111,8 @@ #define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1) #define OFFLOADS_MAX_FT 2 -#define OFFLOADS_NUM_PRIOS 1 -#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1) +#define OFFLOADS_NUM_PRIOS 2 +#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + OFFLOADS_NUM_PRIOS) #define LAG_PRIO_NUM_LEVELS 1 #define LAG_NUM_PRIOS 1 @@ -1892,12 +1892,16 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft, int num_dest) { struct mlx5_flow_root_namespace *root = find_root(&ft->node); + static const struct mlx5_flow_spec zero_spec = {}; struct mlx5_flow_destination gen_dest = {}; struct mlx5_flow_table *next_ft = NULL; struct mlx5_flow_handle *handle = NULL; u32 sw_action = flow_act->action; struct fs_prio *prio; + if (!spec) + spec = &zero_spec; + fs_get_obj(prio, ft->node.parent); if (flow_act->action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { if (!fwd_next_prio_supported(ft)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index f899da9f8488..4b323a7ae794 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -965,6 +965,24 @@ struct mlx5dr_action *mlx5dr_action_create_drop(void) } struct mlx5dr_action * +mlx5dr_action_create_dest_table_num(struct mlx5dr_domain *dmn, u32 table_num) +{ + struct mlx5dr_action *action; + + action = dr_action_create_generic(DR_ACTION_TYP_FT); + if (!action) + return NULL; + + action->dest_tbl.is_fw_tbl = true; + action->dest_tbl.fw_tbl.dmn = dmn; + action->dest_tbl.fw_tbl.id = table_num; + action->dest_tbl.fw_tbl.type = FS_FT_FDB; + refcount_inc(&dmn->refcount); + + return action; +} + +struct mlx5dr_action * mlx5dr_action_create_dest_table(struct mlx5dr_table *tbl) { struct mlx5dr_action *action; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index d12d3a2d46ab..3b3f5b9d4f95 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -384,6 +384,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { list_for_each_entry(dst, &fte->node.children, node.list) { enum mlx5_flow_destination_type type = dst->dest_attr.type; + u32 ft_id; if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || num_term_actions >= MLX5_FLOW_CONTEXT_ACTION_MAX) { @@ -420,6 +421,17 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, num_term_actions++; break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: + ft_id = dst->dest_attr.ft_num; + tmp_action = mlx5dr_action_create_dest_table_num(domain, + ft_id); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + term_actions[num_term_actions++].dest = tmp_action; + break; default: err = -EOPNOTSUPP; goto free_actions; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h index e09e4ea1b045..7deaca9ade3b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -38,8 +38,6 @@ struct mlx5dr_action_dest { struct mlx5dr_action *reformat; }; -#ifdef CONFIG_MLX5_SW_STEERING - struct mlx5dr_domain * mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type); @@ -77,6 +75,9 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, struct mlx5dr_action *action); struct mlx5dr_action * +mlx5dr_action_create_dest_table_num(struct mlx5dr_domain *dmn, u32 table_num); + +struct mlx5dr_action * mlx5dr_action_create_dest_table(struct mlx5dr_table *table); struct mlx5dr_action * @@ -125,103 +126,4 @@ mlx5dr_is_supported(struct mlx5_core_dev *dev) return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner); } -#else /* CONFIG_MLX5_SW_STEERING */ - -static inline struct mlx5dr_domain * -mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) { return NULL; } - -static inline int -mlx5dr_domain_destroy(struct mlx5dr_domain *domain) { return 0; } - -static inline int -mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags) { return 0; } - -static inline void -mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, - struct mlx5dr_domain *peer_dmn) { } - -static inline struct mlx5dr_table * -mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags) { return NULL; } - -static inline int -mlx5dr_table_destroy(struct mlx5dr_table *table) { return 0; } - -static inline u32 -mlx5dr_table_get_id(struct mlx5dr_table *table) { return 0; } - -static inline struct mlx5dr_matcher * -mlx5dr_matcher_create(struct mlx5dr_table *table, - u32 priority, - u8 match_criteria_enable, - struct mlx5dr_match_parameters *mask) { return NULL; } - -static inline int -mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher) { return 0; } - -static inline struct mlx5dr_rule * -mlx5dr_rule_create(struct mlx5dr_matcher *matcher, - struct mlx5dr_match_parameters *value, - size_t num_actions, - struct mlx5dr_action *actions[]) { return NULL; } - -static inline int -mlx5dr_rule_destroy(struct mlx5dr_rule *rule) { return 0; } - -static inline int -mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, - struct mlx5dr_action *action) { return 0; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_dest_table(struct mlx5dr_table *table) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *domain, - struct mlx5_flow_table *ft) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain, - u32 vport, u8 vhca_id_valid, - u16 vhca_id) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, - struct mlx5dr_action_dest *dests, - u32 num_of_dests) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_drop(void) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_tag(u32 tag_value) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_flow_counter(u32 counter_id) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn, - enum mlx5dr_action_reformat_type reformat_type, - size_t data_sz, - void *data) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_modify_header(struct mlx5dr_domain *domain, - u32 flags, - size_t actions_sz, - __be64 actions[]) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_pop_vlan(void) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_push_vlan(struct mlx5dr_domain *domain, - __be32 vlan_hdr) { return NULL; } - -static inline int -mlx5dr_action_destroy(struct mlx5dr_action *action) { return 0; } - -static inline bool -mlx5dr_is_supported(struct mlx5_core_dev *dev) { return false; } - -#endif /* CONFIG_MLX5_SW_STEERING */ - #endif /* _MLX5DR_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index 1f2e6db743e1..c713bc22da7d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -1238,6 +1238,59 @@ err_append_allocated_mirror: } EXPORT_SYMBOL(mlxsw_afa_block_append_mirror); +/* QoS Action + * ---------- + * The QOS_ACTION is used for manipulating the QoS attributes of a packet. It + * can be used to change the DCSP, ECN, Color and Switch Priority of the packet. + * Note that PCP field can be changed using the VLAN action. + */ + +#define MLXSW_AFA_QOS_CODE 0x06 +#define MLXSW_AFA_QOS_SIZE 1 + +enum mlxsw_afa_qos_cmd { + /* Do nothing */ + MLXSW_AFA_QOS_CMD_NOP, + /* Set a field */ + MLXSW_AFA_QOS_CMD_SET, +}; + +/* afa_qos_switch_prio_cmd + * Switch Priority command as per mlxsw_afa_qos_cmd. + */ +MLXSW_ITEM32(afa, qos, switch_prio_cmd, 0x08, 14, 2); + +/* afa_qos_switch_prio + * Switch Priority. + */ +MLXSW_ITEM32(afa, qos, switch_prio, 0x08, 0, 4); + +static inline void +mlxsw_afa_qos_switch_prio_pack(char *payload, + enum mlxsw_afa_qos_cmd prio_cmd, u8 prio) +{ + mlxsw_afa_qos_switch_prio_cmd_set(payload, prio_cmd); + mlxsw_afa_qos_switch_prio_set(payload, prio); +} + +int mlxsw_afa_block_append_qos_switch_prio(struct mlxsw_afa_block *block, + u8 prio, + struct netlink_ext_ack *extack) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_QOS_CODE, + MLXSW_AFA_QOS_SIZE); + + if (IS_ERR(act)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append QOS action"); + return PTR_ERR(act); + } + mlxsw_afa_qos_switch_prio_pack(act, MLXSW_AFA_QOS_CMD_SET, + prio); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_qos_switch_prio); + /* Forwarding Action * ----------------- * Forwarding Action can be used to implement Policy Based Switching (PBS) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index 5f4c1e505136..2125d7d6bcb0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -62,6 +62,9 @@ int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, u16 vid, u8 pcp, u8 et, struct netlink_ext_ack *extack); +int mlxsw_afa_block_append_qos_switch_prio(struct mlxsw_afa_block *block, + u8 prio, + struct netlink_ext_ack *extack); int mlxsw_afa_block_append_allocated_counter(struct mlxsw_afa_block *block, u32 counter_index); int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block, diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h index 6534184cb942..d62496ef299c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/resources.h +++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h @@ -18,6 +18,7 @@ enum mlxsw_res_id { MLXSW_RES_ID_CQE_V1, MLXSW_RES_ID_CQE_V2, MLXSW_RES_ID_COUNTER_POOL_SIZE, + MLXSW_RES_ID_COUNTER_BANK_SIZE, MLXSW_RES_ID_MAX_SPAN, MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES, MLXSW_RES_ID_COUNTER_SIZE_ROUTER_BASIC, @@ -75,6 +76,7 @@ static u16 mlxsw_res_ids[] = { [MLXSW_RES_ID_CQE_V1] = 0x2211, [MLXSW_RES_ID_CQE_V2] = 0x2212, [MLXSW_RES_ID_COUNTER_POOL_SIZE] = 0x2410, + [MLXSW_RES_ID_COUNTER_BANK_SIZE] = 0x2411, [MLXSW_RES_ID_MAX_SPAN] = 0x2420, [MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES] = 0x2443, [MLXSW_RES_ID_COUNTER_SIZE_ROUTER_BASIC] = 0x2449, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 51709012593e..35d3a68ef4fd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -5421,8 +5421,13 @@ static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core) if (err) goto err_resources_span_register; + err = mlxsw_sp_counter_resources_register(mlxsw_core); + if (err) + goto err_resources_counter_register; + return 0; +err_resources_counter_register: err_resources_span_register: devlink_resources_unregister(priv_to_devlink(mlxsw_core), NULL); return err; @@ -5440,8 +5445,13 @@ static int mlxsw_sp2_resources_register(struct mlxsw_core *mlxsw_core) if (err) goto err_resources_span_register; + err = mlxsw_sp_counter_resources_register(mlxsw_core); + if (err) + goto err_resources_counter_register; + return 0; +err_resources_counter_register: err_resources_span_register: devlink_resources_unregister(priv_to_devlink(mlxsw_core), NULL); return err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 81801c6fb941..bbd8bec8fee4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -46,6 +46,10 @@ #define MLXSW_SP_RESOURCE_NAME_SPAN "span_agents" +#define MLXSW_SP_RESOURCE_NAME_COUNTERS "counters" +#define MLXSW_SP_RESOURCE_NAME_COUNTERS_FLOW "flow" +#define MLXSW_SP_RESOURCE_NAME_COUNTERS_RIF "rif" + enum mlxsw_sp_resource_id { MLXSW_SP_RESOURCE_KVD = 1, MLXSW_SP_RESOURCE_KVD_LINEAR, @@ -55,6 +59,9 @@ enum mlxsw_sp_resource_id { MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS, MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS, MLXSW_SP_RESOURCE_SPAN, + MLXSW_SP_RESOURCE_COUNTERS, + MLXSW_SP_RESOURCE_COUNTERS_FLOW, + MLXSW_SP_RESOURCE_COUNTERS_RIF, }; struct mlxsw_sp_port; @@ -739,6 +746,9 @@ int mlxsw_sp_acl_rulei_act_vlan(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, u32 action, u16 vid, u16 proto, u8 prio, struct netlink_ext_ack *extack); +int mlxsw_sp_acl_rulei_act_priority(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u32 prio, struct netlink_ext_ack *extack); int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, struct netlink_ext_ack *extack); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 6f8d5005ff36..01324d002680 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -638,6 +638,23 @@ int mlxsw_sp_acl_rulei_act_vlan(struct mlxsw_sp *mlxsw_sp, } } +int mlxsw_sp_acl_rulei_act_priority(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u32 prio, struct netlink_ext_ack *extack) +{ + /* Even though both Linux and Spectrum switches support 16 priorities, + * spectrum_qdisc only processes the first eight priomap elements, and + * the DCB and PFC features are tied to 8 priorities as well. Therefore + * bounce attempts to prioritize packets to higher priorities. + */ + if (prio >= IEEE_8021QAZ_MAX_TCS) { + NL_SET_ERR_MSG_MOD(extack, "Only priorities 0..7 are supported"); + return -EINVAL; + } + return mlxsw_afa_block_append_qos_switch_prio(rulei->act_block, prio, + extack); +} + int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, struct netlink_ext_ack *extack) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c index 6a02ef9ec00e..7974982533b5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c @@ -7,91 +7,143 @@ #include "spectrum_cnt.h" -#define MLXSW_SP_COUNTER_POOL_BANK_SIZE 4096 - struct mlxsw_sp_counter_sub_pool { + u64 size; unsigned int base_index; - unsigned int size; + enum mlxsw_res_id entry_size_res_id; + const char *resource_name; /* devlink resource name */ + u64 resource_id; /* devlink resource id */ unsigned int entry_size; unsigned int bank_count; + atomic_t active_entries_count; }; struct mlxsw_sp_counter_pool { - unsigned int pool_size; + u64 pool_size; unsigned long *usage; /* Usage bitmap */ spinlock_t counter_pool_lock; /* Protects counter pool allocations */ - struct mlxsw_sp_counter_sub_pool *sub_pools; + atomic_t active_entries_count; + unsigned int sub_pools_count; + struct mlxsw_sp_counter_sub_pool sub_pools[]; }; -static struct mlxsw_sp_counter_sub_pool mlxsw_sp_counter_sub_pools[] = { +static const struct mlxsw_sp_counter_sub_pool mlxsw_sp_counter_sub_pools[] = { [MLXSW_SP_COUNTER_SUB_POOL_FLOW] = { + .entry_size_res_id = MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES, + .resource_name = MLXSW_SP_RESOURCE_NAME_COUNTERS_FLOW, + .resource_id = MLXSW_SP_RESOURCE_COUNTERS_FLOW, .bank_count = 6, }, [MLXSW_SP_COUNTER_SUB_POOL_RIF] = { + .entry_size_res_id = MLXSW_RES_ID_COUNTER_SIZE_ROUTER_BASIC, + .resource_name = MLXSW_SP_RESOURCE_NAME_COUNTERS_RIF, + .resource_id = MLXSW_SP_RESOURCE_COUNTERS_RIF, .bank_count = 2, } }; -static int mlxsw_sp_counter_pool_validate(struct mlxsw_sp *mlxsw_sp) +static u64 mlxsw_sp_counter_sub_pool_occ_get(void *priv) +{ + const struct mlxsw_sp_counter_sub_pool *sub_pool = priv; + + return atomic_read(&sub_pool->active_entries_count); +} + +static int mlxsw_sp_counter_sub_pools_init(struct mlxsw_sp *mlxsw_sp) { - unsigned int total_bank_config = 0; - unsigned int pool_size; + struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool; + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_sp_counter_sub_pool *sub_pool; + unsigned int base_index = 0; + enum mlxsw_res_id res_id; + int err; int i; - pool_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, COUNTER_POOL_SIZE); - /* Check config is valid, no bank over subscription */ - for (i = 0; i < ARRAY_SIZE(mlxsw_sp_counter_sub_pools); i++) - total_bank_config += mlxsw_sp_counter_sub_pools[i].bank_count; - if (total_bank_config > pool_size / MLXSW_SP_COUNTER_POOL_BANK_SIZE + 1) - return -EINVAL; + for (i = 0; i < pool->sub_pools_count; i++) { + sub_pool = &pool->sub_pools[i]; + res_id = sub_pool->entry_size_res_id; + + if (!mlxsw_core_res_valid(mlxsw_sp->core, res_id)) + return -EIO; + sub_pool->entry_size = mlxsw_core_res_get(mlxsw_sp->core, + res_id); + err = devlink_resource_size_get(devlink, + sub_pool->resource_id, + &sub_pool->size); + if (err) + goto err_resource_size_get; + + devlink_resource_occ_get_register(devlink, + sub_pool->resource_id, + mlxsw_sp_counter_sub_pool_occ_get, + sub_pool); + + sub_pool->base_index = base_index; + base_index += sub_pool->size; + atomic_set(&sub_pool->active_entries_count, 0); + } return 0; + +err_resource_size_get: + for (i--; i >= 0; i--) { + sub_pool = &pool->sub_pools[i]; + + devlink_resource_occ_get_unregister(devlink, + sub_pool->resource_id); + } + return err; } -static int mlxsw_sp_counter_sub_pools_prepare(struct mlxsw_sp *mlxsw_sp) +static void mlxsw_sp_counter_sub_pools_fini(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool; + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); struct mlxsw_sp_counter_sub_pool *sub_pool; + int i; - /* Prepare generic flow pool*/ - sub_pool = &mlxsw_sp_counter_sub_pools[MLXSW_SP_COUNTER_SUB_POOL_FLOW]; - if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, COUNTER_SIZE_PACKETS_BYTES)) - return -EIO; - sub_pool->entry_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, - COUNTER_SIZE_PACKETS_BYTES); - /* Prepare erif pool*/ - sub_pool = &mlxsw_sp_counter_sub_pools[MLXSW_SP_COUNTER_SUB_POOL_RIF]; - if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, COUNTER_SIZE_ROUTER_BASIC)) - return -EIO; - sub_pool->entry_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, - COUNTER_SIZE_ROUTER_BASIC); - return 0; + for (i = 0; i < pool->sub_pools_count; i++) { + sub_pool = &pool->sub_pools[i]; + + WARN_ON(atomic_read(&sub_pool->active_entries_count)); + devlink_resource_occ_get_unregister(devlink, + sub_pool->resource_id); + } +} + +static u64 mlxsw_sp_counter_pool_occ_get(void *priv) +{ + const struct mlxsw_sp_counter_pool *pool = priv; + + return atomic_read(&pool->active_entries_count); } int mlxsw_sp_counter_pool_init(struct mlxsw_sp *mlxsw_sp) { + unsigned int sub_pools_count = ARRAY_SIZE(mlxsw_sp_counter_sub_pools); + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); struct mlxsw_sp_counter_sub_pool *sub_pool; struct mlxsw_sp_counter_pool *pool; - unsigned int base_index; unsigned int map_size; - int i; int err; - if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, COUNTER_POOL_SIZE)) - return -EIO; - - err = mlxsw_sp_counter_pool_validate(mlxsw_sp); - if (err) - return err; - - err = mlxsw_sp_counter_sub_pools_prepare(mlxsw_sp); - if (err) - return err; - - pool = kzalloc(sizeof(*pool), GFP_KERNEL); + pool = kzalloc(struct_size(pool, sub_pools, sub_pools_count), + GFP_KERNEL); if (!pool) return -ENOMEM; + mlxsw_sp->counter_pool = pool; + memcpy(pool->sub_pools, mlxsw_sp_counter_sub_pools, + sub_pools_count * sizeof(*sub_pool)); + pool->sub_pools_count = sub_pools_count; spin_lock_init(&pool->counter_pool_lock); + atomic_set(&pool->active_entries_count, 0); + + err = devlink_resource_size_get(devlink, MLXSW_SP_RESOURCE_COUNTERS, + &pool->pool_size); + if (err) + goto err_pool_resource_size_get; + devlink_resource_occ_get_register(devlink, MLXSW_SP_RESOURCE_COUNTERS, + mlxsw_sp_counter_pool_occ_get, pool); - pool->pool_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, COUNTER_POOL_SIZE); map_size = BITS_TO_LONGS(pool->pool_size) * sizeof(unsigned long); pool->usage = kzalloc(map_size, GFP_KERNEL); @@ -100,26 +152,18 @@ int mlxsw_sp_counter_pool_init(struct mlxsw_sp *mlxsw_sp) goto err_usage_alloc; } - pool->sub_pools = mlxsw_sp_counter_sub_pools; - /* Allocation is based on bank count which should be - * specified for each sub pool statically. - */ - base_index = 0; - for (i = 0; i < ARRAY_SIZE(mlxsw_sp_counter_sub_pools); i++) { - sub_pool = &pool->sub_pools[i]; - sub_pool->size = sub_pool->bank_count * - MLXSW_SP_COUNTER_POOL_BANK_SIZE; - sub_pool->base_index = base_index; - base_index += sub_pool->size; - /* The last bank can't be fully used */ - if (sub_pool->base_index + sub_pool->size > pool->pool_size) - sub_pool->size = pool->pool_size - sub_pool->base_index; - } + err = mlxsw_sp_counter_sub_pools_init(mlxsw_sp); + if (err) + goto err_sub_pools_init; - mlxsw_sp->counter_pool = pool; return 0; +err_sub_pools_init: + kfree(pool->usage); err_usage_alloc: + devlink_resource_occ_get_unregister(devlink, + MLXSW_SP_RESOURCE_COUNTERS); +err_pool_resource_size_get: kfree(pool); return err; } @@ -127,10 +171,15 @@ err_usage_alloc: void mlxsw_sp_counter_pool_fini(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool; + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + mlxsw_sp_counter_sub_pools_fini(mlxsw_sp); WARN_ON(find_first_bit(pool->usage, pool->pool_size) != pool->pool_size); + WARN_ON(atomic_read(&pool->active_entries_count)); kfree(pool->usage); + devlink_resource_occ_get_unregister(devlink, + MLXSW_SP_RESOURCE_COUNTERS); kfree(pool); } @@ -144,7 +193,7 @@ int mlxsw_sp_counter_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int stop_index; int i, err; - sub_pool = &mlxsw_sp_counter_sub_pools[sub_pool_id]; + sub_pool = &pool->sub_pools[sub_pool_id]; stop_index = sub_pool->base_index + sub_pool->size; entry_index = sub_pool->base_index; @@ -166,6 +215,8 @@ int mlxsw_sp_counter_alloc(struct mlxsw_sp *mlxsw_sp, spin_unlock(&pool->counter_pool_lock); *p_counter_index = entry_index; + atomic_add(sub_pool->entry_size, &sub_pool->active_entries_count); + atomic_add(sub_pool->entry_size, &pool->active_entries_count); return 0; err_alloc: @@ -183,9 +234,77 @@ void mlxsw_sp_counter_free(struct mlxsw_sp *mlxsw_sp, if (WARN_ON(counter_index >= pool->pool_size)) return; - sub_pool = &mlxsw_sp_counter_sub_pools[sub_pool_id]; + sub_pool = &pool->sub_pools[sub_pool_id]; spin_lock(&pool->counter_pool_lock); for (i = 0; i < sub_pool->entry_size; i++) __clear_bit(counter_index + i, pool->usage); spin_unlock(&pool->counter_pool_lock); + atomic_sub(sub_pool->entry_size, &sub_pool->active_entries_count); + atomic_sub(sub_pool->entry_size, &pool->active_entries_count); +} + +int mlxsw_sp_counter_resources_register(struct mlxsw_core *mlxsw_core) +{ + static struct devlink_resource_size_params size_params; + struct devlink *devlink = priv_to_devlink(mlxsw_core); + const struct mlxsw_sp_counter_sub_pool *sub_pool; + unsigned int total_bank_config; + u64 sub_pool_size; + u64 base_index; + u64 pool_size; + u64 bank_size; + int err; + int i; + + if (!MLXSW_CORE_RES_VALID(mlxsw_core, COUNTER_POOL_SIZE) || + !MLXSW_CORE_RES_VALID(mlxsw_core, COUNTER_BANK_SIZE)) + return -EIO; + + pool_size = MLXSW_CORE_RES_GET(mlxsw_core, COUNTER_POOL_SIZE); + bank_size = MLXSW_CORE_RES_GET(mlxsw_core, COUNTER_BANK_SIZE); + + devlink_resource_size_params_init(&size_params, pool_size, + pool_size, bank_size, + DEVLINK_RESOURCE_UNIT_ENTRY); + err = devlink_resource_register(devlink, + MLXSW_SP_RESOURCE_NAME_COUNTERS, + pool_size, + MLXSW_SP_RESOURCE_COUNTERS, + DEVLINK_RESOURCE_ID_PARENT_TOP, + &size_params); + if (err) + return err; + + /* Allocation is based on bank count which should be + * specified for each sub pool statically. + */ + total_bank_config = 0; + base_index = 0; + for (i = 0; i < ARRAY_SIZE(mlxsw_sp_counter_sub_pools); i++) { + sub_pool = &mlxsw_sp_counter_sub_pools[i]; + sub_pool_size = sub_pool->bank_count * bank_size; + /* The last bank can't be fully used */ + if (base_index + sub_pool_size > pool_size) + sub_pool_size = pool_size - base_index; + base_index += sub_pool_size; + + devlink_resource_size_params_init(&size_params, sub_pool_size, + sub_pool_size, bank_size, + DEVLINK_RESOURCE_UNIT_ENTRY); + err = devlink_resource_register(devlink, + sub_pool->resource_name, + sub_pool_size, + sub_pool->resource_id, + MLXSW_SP_RESOURCE_COUNTERS, + &size_params); + if (err) + return err; + total_bank_config += sub_pool->bank_count; + } + + /* Check config is valid, no bank over subscription */ + if (WARN_ON(total_bank_config > div64_u64(pool_size, bank_size) + 1)) + return -EINVAL; + + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h index 81465e267b10..a68d931090dd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h @@ -4,6 +4,7 @@ #ifndef _MLXSW_SPECTRUM_CNT_H #define _MLXSW_SPECTRUM_CNT_H +#include "core.h" #include "spectrum.h" enum mlxsw_sp_counter_sub_pool_id { @@ -19,5 +20,6 @@ void mlxsw_sp_counter_free(struct mlxsw_sp *mlxsw_sp, unsigned int counter_index); int mlxsw_sp_counter_pool_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_counter_pool_fini(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_counter_resources_register(struct mlxsw_core *mlxsw_core); #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 88aa554415df..3d3cad3784e5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -26,17 +26,17 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, if (!flow_action_has_entries(flow_action)) return 0; - if (!flow_action_mixed_hw_stats_types_check(flow_action, extack)) + if (!flow_action_mixed_hw_stats_check(flow_action, extack)) return -EOPNOTSUPP; act = flow_action_first_entry_get(flow_action); - if (act->hw_stats_type == FLOW_ACTION_HW_STATS_TYPE_ANY || - act->hw_stats_type == FLOW_ACTION_HW_STATS_TYPE_IMMEDIATE) { + if (act->hw_stats == FLOW_ACTION_HW_STATS_ANY || + act->hw_stats == FLOW_ACTION_HW_STATS_IMMEDIATE) { /* Count action is inserted first */ err = mlxsw_sp_acl_rulei_act_count(mlxsw_sp, rulei, extack); if (err) return err; - } else if (act->hw_stats_type != FLOW_ACTION_HW_STATS_TYPE_DISABLED) { + } else if (act->hw_stats != FLOW_ACTION_HW_STATS_DISABLED) { NL_SET_ERR_MSG_MOD(extack, "Unsupported action HW stats type"); return -EOPNOTSUPP; } @@ -154,6 +154,10 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, act->id, vid, proto, prio, extack); } + case FLOW_ACTION_PRIORITY: + return mlxsw_sp_acl_rulei_act_priority(mlxsw_sp, rulei, + act->priority, + extack); default: NL_SET_ERR_MSG_MOD(extack, "Unsupported action"); dev_err(mlxsw_sp->bus_info->dev, "Unsupported action\n"); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index b9f429ec0db4..670a43fe2a00 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -323,7 +323,7 @@ mlxsw_sp_qdisc_get_tc_stats(struct mlxsw_sp_port *mlxsw_sp_port, static int mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port, int tclass_num, u32 min, u32 max, - u32 probability, bool is_ecn) + u32 probability, bool is_wred, bool is_ecn) { char cwtpm_cmd[MLXSW_REG_CWTPM_LEN]; char cwtp_cmd[MLXSW_REG_CWTP_LEN]; @@ -341,7 +341,7 @@ mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port, return err; mlxsw_reg_cwtpm_pack(cwtpm_cmd, mlxsw_sp_port->local_port, tclass_num, - MLXSW_REG_CWTP_DEFAULT_PROFILE, true, is_ecn); + MLXSW_REG_CWTP_DEFAULT_PROFILE, is_wred, is_ecn); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtpm_cmd); } @@ -445,8 +445,9 @@ mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, prob = DIV_ROUND_UP(prob, 1 << 16); min = mlxsw_sp_bytes_cells(mlxsw_sp, p->min); max = mlxsw_sp_bytes_cells(mlxsw_sp, p->max); - return mlxsw_sp_tclass_congestion_enable(mlxsw_sp_port, tclass_num, min, - max, prob, p->is_ecn); + return mlxsw_sp_tclass_congestion_enable(mlxsw_sp_port, tclass_num, + min, max, prob, + !p->is_nodrop, p->is_ecn); } static void diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c index 9c300d625e04..727f6ef243df 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c @@ -132,23 +132,23 @@ static void mlxsw_sp_rx_exception_listener(struct sk_buff *skb, u8 local_port, #define MLXSW_SP_TRAP_DROP(_id, _group_id) \ DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \ - DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ MLXSW_SP_TRAP_METADATA) #define MLXSW_SP_TRAP_DROP_EXT(_id, _group_id, _metadata) \ DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \ - DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ MLXSW_SP_TRAP_METADATA | (_metadata)) #define MLXSW_SP_TRAP_DRIVER_DROP(_id, _group_id) \ DEVLINK_TRAP_DRIVER(DROP, DROP, DEVLINK_MLXSW_TRAP_ID_##_id, \ DEVLINK_MLXSW_TRAP_NAME_##_id, \ - DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ MLXSW_SP_TRAP_METADATA) #define MLXSW_SP_TRAP_EXCEPTION(_id, _group_id) \ DEVLINK_TRAP_GENERIC(EXCEPTION, TRAP, _id, \ - DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ MLXSW_SP_TRAP_METADATA) #define MLXSW_SP_RXL_DISCARD(_id, _group_id) \ @@ -165,6 +165,13 @@ static void mlxsw_sp_rx_exception_listener(struct sk_buff *skb, u8 local_port, MLXSW_RXL(mlxsw_sp_rx_exception_listener, _id, \ _action, false, SP_##_group_id, SET_FW_DEFAULT) +static const struct devlink_trap_group mlxsw_sp_trap_groups_arr[] = { + DEVLINK_TRAP_GROUP_GENERIC(L2_DROPS), + DEVLINK_TRAP_GROUP_GENERIC(L3_DROPS), + DEVLINK_TRAP_GROUP_GENERIC(TUNNEL_DROPS), + DEVLINK_TRAP_GROUP_GENERIC(ACL_DROPS), +}; + static const struct devlink_trap mlxsw_sp_traps_arr[] = { MLXSW_SP_TRAP_DROP(SMAC_MC, L2_DROPS), MLXSW_SP_TRAP_DROP(VLAN_TAG_MISMATCH, L2_DROPS), @@ -318,6 +325,7 @@ static int mlxsw_sp_trap_dummy_group_init(struct mlxsw_sp *mlxsw_sp) int mlxsw_sp_devlink_traps_init(struct mlxsw_sp *mlxsw_sp) { + size_t groups_count = ARRAY_SIZE(mlxsw_sp_trap_groups_arr); struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); int err; @@ -333,17 +341,33 @@ int mlxsw_sp_devlink_traps_init(struct mlxsw_sp *mlxsw_sp) ARRAY_SIZE(mlxsw_sp_listeners_arr))) return -EINVAL; - return devlink_traps_register(devlink, mlxsw_sp_traps_arr, - ARRAY_SIZE(mlxsw_sp_traps_arr), - mlxsw_sp); + err = devlink_trap_groups_register(devlink, mlxsw_sp_trap_groups_arr, + groups_count); + if (err) + return err; + + err = devlink_traps_register(devlink, mlxsw_sp_traps_arr, + ARRAY_SIZE(mlxsw_sp_traps_arr), mlxsw_sp); + if (err) + goto err_traps_register; + + return 0; + +err_traps_register: + devlink_trap_groups_unregister(devlink, mlxsw_sp_trap_groups_arr, + groups_count); + return err; } void mlxsw_sp_devlink_traps_fini(struct mlxsw_sp *mlxsw_sp) { + size_t groups_count = ARRAY_SIZE(mlxsw_sp_trap_groups_arr); struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); devlink_traps_unregister(devlink, mlxsw_sp_traps_arr, ARRAY_SIZE(mlxsw_sp_traps_arr)); + devlink_trap_groups_unregister(devlink, mlxsw_sp_trap_groups_arr, + groups_count); } int mlxsw_sp_trap_init(struct mlxsw_core *mlxsw_core, diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 06f9d013f807..dc0e27328661 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -2183,24 +2183,29 @@ static int ocelot_init_timestamp(struct ocelot *ocelot) return 0; } -static void ocelot_port_set_mtu(struct ocelot *ocelot, int port, size_t mtu) +/* Configure the maximum SDU (L2 payload) on RX to the value specified in @sdu. + * The length of VLAN tags is accounted for automatically via DEV_MAC_TAGS_CFG. + */ +static void ocelot_port_set_maxlen(struct ocelot *ocelot, int port, size_t sdu) { struct ocelot_port *ocelot_port = ocelot->ports[port]; + int maxlen = sdu + ETH_HLEN + ETH_FCS_LEN; int atop_wm; - ocelot_port_writel(ocelot_port, mtu, DEV_MAC_MAXLEN_CFG); + ocelot_port_writel(ocelot_port, maxlen, DEV_MAC_MAXLEN_CFG); /* Set Pause WM hysteresis - * 152 = 6 * mtu / OCELOT_BUFFER_CELL_SZ - * 101 = 4 * mtu / OCELOT_BUFFER_CELL_SZ + * 152 = 6 * maxlen / OCELOT_BUFFER_CELL_SZ + * 101 = 4 * maxlen / OCELOT_BUFFER_CELL_SZ */ ocelot_write_rix(ocelot, SYS_PAUSE_CFG_PAUSE_ENA | SYS_PAUSE_CFG_PAUSE_STOP(101) | SYS_PAUSE_CFG_PAUSE_START(152), SYS_PAUSE_CFG, port); /* Tail dropping watermark */ - atop_wm = (ocelot->shared_queue_sz - 9 * mtu) / OCELOT_BUFFER_CELL_SZ; - ocelot_write_rix(ocelot, ocelot_wm_enc(9 * mtu), + atop_wm = (ocelot->shared_queue_sz - 9 * maxlen) / + OCELOT_BUFFER_CELL_SZ; + ocelot_write_rix(ocelot, ocelot_wm_enc(9 * maxlen), SYS_ATOP, port); ocelot_write(ocelot, ocelot_wm_enc(atop_wm), SYS_ATOP_TOT_CFG); } @@ -2229,9 +2234,10 @@ void ocelot_init_port(struct ocelot *ocelot, int port) DEV_MAC_HDX_CFG); /* Set Max Length and maximum tags allowed */ - ocelot_port_set_mtu(ocelot, port, VLAN_ETH_FRAME_LEN); + ocelot_port_set_maxlen(ocelot, port, ETH_DATA_LEN); ocelot_port_writel(ocelot_port, DEV_MAC_TAGS_CFG_TAG_ID(ETH_P_8021AD) | DEV_MAC_TAGS_CFG_VLAN_AWR_ENA | + DEV_MAC_TAGS_CFG_VLAN_DBL_AWR_ENA | DEV_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA, DEV_MAC_TAGS_CFG); @@ -2324,18 +2330,18 @@ void ocelot_configure_cpu(struct ocelot *ocelot, int npi, ANA_PORT_PORT_CFG, cpu); if (npi >= 0 && npi < ocelot->num_phys_ports) { - int mtu = VLAN_ETH_FRAME_LEN + OCELOT_TAG_LEN; + int sdu = ETH_DATA_LEN + OCELOT_TAG_LEN; ocelot_write(ocelot, QSYS_EXT_CPU_CFG_EXT_CPUQ_MSK_M | QSYS_EXT_CPU_CFG_EXT_CPU_PORT(npi), QSYS_EXT_CPU_CFG); if (injection == OCELOT_TAG_PREFIX_SHORT) - mtu += OCELOT_SHORT_PREFIX_LEN; + sdu += OCELOT_SHORT_PREFIX_LEN; else if (injection == OCELOT_TAG_PREFIX_LONG) - mtu += OCELOT_LONG_PREFIX_LEN; + sdu += OCELOT_LONG_PREFIX_LEN; - ocelot_port_set_mtu(ocelot, npi, mtu); + ocelot_port_set_maxlen(ocelot, npi, sdu); /* Enable NPI port */ ocelot_write_rix(ocelot, diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index 6d84173373c7..873a9944fbfb 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -17,8 +17,8 @@ static int ocelot_flower_parse_action(struct flow_cls_offload *f, if (!flow_offload_has_one_action(&f->rule->action)) return -EOPNOTSUPP; - if (!flow_action_basic_hw_stats_types_check(&f->rule->action, - f->common.extack)) + if (!flow_action_basic_hw_stats_check(&f->rule->action, + f->common.extack)) return -EOPNOTSUPP; flow_action_for_each(i, a, &f->rule->action) { diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 2ee0d0be113a..2616fd735aab 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -1920,6 +1920,7 @@ myri10ge_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state) } static const struct ethtool_ops myri10ge_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS, .get_drvinfo = myri10ge_get_drvinfo, .get_coalesce = myri10ge_get_coalesce, .set_coalesce = myri10ge_set_coalesce, diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 4aa7346cb040..1c76e1592ca2 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -1207,8 +1207,8 @@ int nfp_flower_compile_action(struct nfp_app *app, bool pkt_host = false; u32 csum_updated = 0; - if (!flow_action_basic_hw_stats_types_check(&flow->rule->action, - extack)) + if (!flow_action_hw_stats_check(&flow->rule->action, extack, + FLOW_ACTION_HW_STATS_DELAYED_BIT)) return -EOPNOTSUPP; memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index c50fce42f473..07dbf4d72227 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -211,7 +211,7 @@ static const struct nfp_devlink_versions { enum nfp_nsp_versions id; const char *key; } nfp_devlink_versions_nsp[] = { - { NFP_VERSIONS_BUNDLE, "fw.bundle_id", }, + { NFP_VERSIONS_BUNDLE, DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID, }, { NFP_VERSIONS_BSP, DEVLINK_INFO_VERSION_GENERIC_FW_MGMT, }, { NFP_VERSIONS_CPLD, "fw.cpld", }, { NFP_VERSIONS_APP, DEVLINK_INFO_VERSION_GENERIC_FW_APP, }, diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c index 49c7987c2abd..2fdd0753b3af 100644 --- a/drivers/net/ethernet/ni/nixge.c +++ b/drivers/net/ethernet/ni/nixge.c @@ -1019,27 +1019,6 @@ static int nixge_ethtools_set_coalesce(struct net_device *ndev, return -EBUSY; } - if (ecoalesce->rx_coalesce_usecs || - ecoalesce->rx_coalesce_usecs_irq || - ecoalesce->rx_max_coalesced_frames_irq || - ecoalesce->tx_coalesce_usecs || - ecoalesce->tx_coalesce_usecs_irq || - ecoalesce->tx_max_coalesced_frames_irq || - ecoalesce->stats_block_coalesce_usecs || - ecoalesce->use_adaptive_rx_coalesce || - ecoalesce->use_adaptive_tx_coalesce || - ecoalesce->pkt_rate_low || - ecoalesce->rx_coalesce_usecs_low || - ecoalesce->rx_max_coalesced_frames_low || - ecoalesce->tx_coalesce_usecs_low || - ecoalesce->tx_max_coalesced_frames_low || - ecoalesce->pkt_rate_high || - ecoalesce->rx_coalesce_usecs_high || - ecoalesce->rx_max_coalesced_frames_high || - ecoalesce->tx_coalesce_usecs_high || - ecoalesce->tx_max_coalesced_frames_high || - ecoalesce->rate_sample_interval) - return -EOPNOTSUPP; if (ecoalesce->rx_max_coalesced_frames) priv->coalesce_count_rx = ecoalesce->rx_max_coalesced_frames; if (ecoalesce->tx_max_coalesced_frames) @@ -1083,6 +1062,7 @@ static int nixge_ethtools_set_phys_id(struct net_device *ndev, } static const struct ethtool_ops nixge_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES, .get_drvinfo = nixge_ethtools_get_drvinfo, .get_coalesce = nixge_ethtools_get_coalesce, .set_coalesce = nixge_ethtools_set_coalesce, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c index bc03cecf80cc..5f8fc58d42b3 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c @@ -228,7 +228,13 @@ DEFINE_SHOW_ATTRIBUTE(netdev); void ionic_debugfs_add_lif(struct ionic_lif *lif) { - lif->dentry = debugfs_create_dir(lif->name, lif->ionic->dentry); + struct dentry *lif_dentry; + + lif_dentry = debugfs_create_dir(lif->name, lif->ionic->dentry); + if (IS_ERR_OR_NULL(lif_dentry)) + return; + lif->dentry = lif_dentry; + debugfs_create_file("netdev", 0400, lif->dentry, lif->netdev, &netdev_fops); } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c index ed14164468a1..273c889faaad 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c @@ -77,12 +77,16 @@ int ionic_devlink_register(struct ionic *ionic) return err; } + /* don't register the mgmt_nic as a port */ + if (ionic->is_mgmt_nic) + return 0; + devlink_port_attrs_set(&ionic->dl_port, DEVLINK_PORT_FLAVOUR_PHYSICAL, 0, false, 0, NULL, 0); err = devlink_port_register(dl, &ionic->dl_port, 0); if (err) dev_err(ionic->dev, "devlink_port_register failed: %d\n", err); - else if (!ionic->is_mgmt_nic) + else devlink_port_type_eth_set(&ionic->dl_port, ionic->master_lif->netdev); @@ -93,6 +97,7 @@ void ionic_devlink_unregister(struct ionic *ionic) { struct devlink *dl = priv_to_devlink(ionic); - devlink_port_unregister(&ionic->dl_port); + if (ionic->dl_port.registered) + devlink_port_unregister(&ionic->dl_port); devlink_unregister(dl); } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index a233716eac29..6996229facfd 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -3,6 +3,7 @@ #include <linux/module.h> #include <linux/netdevice.h> +#include <linux/sfp.h> #include "ionic.h" #include "ionic_bus.h" @@ -677,23 +678,27 @@ static int ionic_get_module_info(struct net_device *netdev, struct ionic_lif *lif = netdev_priv(netdev); struct ionic_dev *idev = &lif->ionic->idev; struct ionic_xcvr_status *xcvr; + struct sfp_eeprom_base *sfp; xcvr = &idev->port_info->status.xcvr; + sfp = (struct sfp_eeprom_base *) xcvr->sprom; /* report the module data type and length */ - switch (xcvr->sprom[0]) { - case 0x03: /* SFP */ + switch (sfp->phys_id) { + case SFF8024_ID_SFP: modinfo->type = ETH_MODULE_SFF_8079; modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; break; - case 0x0D: /* QSFP */ - case 0x11: /* QSFP28 */ + case SFF8024_ID_QSFP_8436_8636: + case SFF8024_ID_QSFP28_8636: modinfo->type = ETH_MODULE_SFF_8436; modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; break; default: netdev_info(netdev, "unknown xcvr type 0x%02x\n", xcvr->sprom[0]); + modinfo->type = 0; + modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; break; } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index aaf4a40fa98b..8b442eb010a2 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -275,8 +275,10 @@ static void ionic_lif_qcq_deinit(struct ionic_lif *lif, struct ionic_qcq *qcq) if (qcq->flags & IONIC_QCQ_F_INTR) { ionic_intr_mask(idev->intr_ctrl, qcq->intr.index, IONIC_INTR_MASK_SET); + irq_set_affinity_hint(qcq->intr.vector, NULL); devm_free_irq(dev, qcq->intr.vector, &qcq->napi); netif_napi_del(&qcq->napi); + qcq->intr.vector = 0; } qcq->flags &= ~IONIC_QCQ_F_INITED; @@ -318,19 +320,21 @@ static void ionic_qcqs_free(struct ionic_lif *lif) lif->adminqcq = NULL; } - for (i = 0; i < lif->nxqs; i++) - if (lif->rxqcqs[i].stats) - devm_kfree(dev, lif->rxqcqs[i].stats); - - devm_kfree(dev, lif->rxqcqs); - lif->rxqcqs = NULL; - - for (i = 0; i < lif->nxqs; i++) - if (lif->txqcqs[i].stats) - devm_kfree(dev, lif->txqcqs[i].stats); + if (lif->rxqcqs) { + for (i = 0; i < lif->nxqs; i++) + if (lif->rxqcqs[i].stats) + devm_kfree(dev, lif->rxqcqs[i].stats); + devm_kfree(dev, lif->rxqcqs); + lif->rxqcqs = NULL; + } - devm_kfree(dev, lif->txqcqs); - lif->txqcqs = NULL; + if (lif->txqcqs) { + for (i = 0; i < lif->nxqs; i++) + if (lif->txqcqs[i].stats) + devm_kfree(dev, lif->txqcqs[i].stats); + devm_kfree(dev, lif->txqcqs); + lif->txqcqs = NULL; + } } static void ionic_link_qcq_interrupts(struct ionic_qcq *src_qcq, @@ -832,7 +836,7 @@ static int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr) memcpy(ctx.cmd.rx_filter_add.mac.addr, addr, ETH_ALEN); err = ionic_adminq_post_wait(lif, &ctx); - if (err) + if (err && err != -EEXIST) return err; return ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx); @@ -862,7 +866,7 @@ static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr) spin_unlock_bh(&lif->rx_filters.lock); err = ionic_adminq_post_wait(lif, &ctx); - if (err) + if (err && err != -EEXIST) return err; netdev_dbg(lif->netdev, "rx_filter del ADDR %pM (id %d)\n", addr, @@ -1425,10 +1429,15 @@ static void ionic_lif_rss_deinit(struct ionic_lif *lif) static void ionic_txrx_disable(struct ionic_lif *lif) { unsigned int i; + int err; for (i = 0; i < lif->nxqs; i++) { - ionic_qcq_disable(lif->txqcqs[i].qcq); - ionic_qcq_disable(lif->rxqcqs[i].qcq); + err = ionic_qcq_disable(lif->txqcqs[i].qcq); + if (err == -ETIMEDOUT) + break; + err = ionic_qcq_disable(lif->rxqcqs[i].qcq); + if (err == -ETIMEDOUT) + break; } } @@ -1552,7 +1561,8 @@ static int ionic_txrx_enable(struct ionic_lif *lif) ionic_rx_fill(&lif->rxqcqs[i].qcq->q); err = ionic_qcq_enable(lif->rxqcqs[i].qcq); if (err) { - ionic_qcq_disable(lif->txqcqs[i].qcq); + if (err != -ETIMEDOUT) + ionic_qcq_disable(lif->txqcqs[i].qcq); goto err_out; } } @@ -1561,8 +1571,12 @@ static int ionic_txrx_enable(struct ionic_lif *lif) err_out: while (i--) { - ionic_qcq_disable(lif->rxqcqs[i].qcq); - ionic_qcq_disable(lif->txqcqs[i].qcq); + err = ionic_qcq_disable(lif->rxqcqs[i].qcq); + if (err == -ETIMEDOUT) + break; + err = ionic_qcq_disable(lif->txqcqs[i].qcq); + if (err == -ETIMEDOUT) + break; } return err; @@ -1700,7 +1714,7 @@ static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) if (!(is_zero_ether_addr(mac) || is_valid_ether_addr(mac))) return -EINVAL; - down_read(&ionic->vf_op_lock); + down_write(&ionic->vf_op_lock); if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) { ret = -EINVAL; @@ -1710,7 +1724,7 @@ static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) ether_addr_copy(ionic->vfs[vf].macaddr, mac); } - up_read(&ionic->vf_op_lock); + up_write(&ionic->vf_op_lock); return ret; } @@ -1731,7 +1745,7 @@ static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, if (proto != htons(ETH_P_8021Q)) return -EPROTONOSUPPORT; - down_read(&ionic->vf_op_lock); + down_write(&ionic->vf_op_lock); if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) { ret = -EINVAL; @@ -1742,7 +1756,7 @@ static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, ionic->vfs[vf].vlanid = vlan; } - up_read(&ionic->vf_op_lock); + up_write(&ionic->vf_op_lock); return ret; } @@ -2066,9 +2080,11 @@ static void ionic_lif_deinit(struct ionic_lif *lif) clear_bit(IONIC_LIF_F_INITED, lif->state); ionic_rx_filters_deinit(lif); - ionic_lif_rss_deinit(lif); + if (lif->netdev->features & NETIF_F_RXHASH) + ionic_lif_rss_deinit(lif); napi_disable(&lif->adminqcq->napi); + netif_napi_del(&lif->adminqcq->napi); ionic_lif_qcq_deinit(lif, lif->notifyqcq); ionic_lif_qcq_deinit(lif, lif->adminqcq); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index e4a76e66f542..c16dbbe54bf7 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -58,6 +58,8 @@ static const char *ionic_error_to_str(enum ionic_status_code code) return "IONIC_RC_BAD_ADDR"; case IONIC_RC_DEV_CMD: return "IONIC_RC_DEV_CMD"; + case IONIC_RC_ENOSUPP: + return "IONIC_RC_ENOSUPP"; case IONIC_RC_ERROR: return "IONIC_RC_ERROR"; case IONIC_RC_ERDMA: @@ -76,6 +78,7 @@ static int ionic_error_to_errno(enum ionic_status_code code) case IONIC_RC_EQTYPE: case IONIC_RC_EQID: case IONIC_RC_EINVAL: + case IONIC_RC_ENOSUPP: return -EINVAL; case IONIC_RC_EPERM: return -EPERM; @@ -240,11 +243,16 @@ static void ionic_adminq_cb(struct ionic_queue *q, static int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx) { - struct ionic_queue *adminq = &lif->adminqcq->q; + struct ionic_queue *adminq; int err = 0; WARN_ON(in_interrupt()); + if (!lif->adminqcq) + return -EIO; + + adminq = &lif->adminqcq->q; + spin_lock(&lif->adminq_lock); if (!ionic_q_has_space(adminq, 1)) { err = -ENOSPC; @@ -357,7 +365,10 @@ try_again: done, duration / HZ, duration); if (!done && hb) { - ionic_dev_cmd_clean(ionic); + /* It is possible (but unlikely) that FW was busy and missed a + * heartbeat check but is still alive and will process this + * request, so don't clean the dev_cmd in this case. + */ dev_warn(ionic->dev, "DEVCMD %s (%d) failed - FW halted\n", ionic_opcode_to_str(opcode), opcode); return -ENXIO; diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c index 6a2d91d58968..66f45fce90fa 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c @@ -748,24 +748,7 @@ static int netxen_set_intr_coalesce(struct net_device *netdev, if (ethcoal->rx_coalesce_usecs > 0xffff || ethcoal->rx_max_coalesced_frames > 0xffff || ethcoal->tx_coalesce_usecs > 0xffff || - ethcoal->tx_max_coalesced_frames > 0xffff || - ethcoal->rx_coalesce_usecs_irq || - ethcoal->rx_max_coalesced_frames_irq || - ethcoal->tx_coalesce_usecs_irq || - ethcoal->tx_max_coalesced_frames_irq || - ethcoal->stats_block_coalesce_usecs || - ethcoal->use_adaptive_rx_coalesce || - ethcoal->use_adaptive_tx_coalesce || - ethcoal->pkt_rate_low || - ethcoal->rx_coalesce_usecs_low || - ethcoal->rx_max_coalesced_frames_low || - ethcoal->tx_coalesce_usecs_low || - ethcoal->tx_max_coalesced_frames_low || - ethcoal->pkt_rate_high || - ethcoal->rx_coalesce_usecs_high || - ethcoal->rx_max_coalesced_frames_high || - ethcoal->tx_coalesce_usecs_high || - ethcoal->tx_max_coalesced_frames_high) + ethcoal->tx_max_coalesced_frames > 0xffff) return -EINVAL; if (!ethcoal->rx_coalesce_usecs || @@ -923,6 +906,8 @@ netxen_get_dump_data(struct net_device *netdev, struct ethtool_dump *dump, } const struct ethtool_ops netxen_nic_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES, .get_drvinfo = netxen_nic_get_drvinfo, .get_regs_len = netxen_nic_get_regs_len, .get_regs = netxen_nic_get_regs, diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 8a426afb6a55..812c7766e096 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -1566,7 +1566,7 @@ static int qede_selftest_transmit_traffic(struct qede_dev *edev, static int qede_selftest_receive_traffic(struct qede_dev *edev) { - u16 hw_comp_cons, sw_comp_cons, sw_rx_index, len; + u16 sw_rx_index, len; struct eth_fast_path_rx_reg_cqe *fp_cqe; struct qede_rx_queue *rxq = NULL; struct sw_rx_data *sw_rx_data; @@ -1596,17 +1596,6 @@ static int qede_selftest_receive_traffic(struct qede_dev *edev) continue; } - hw_comp_cons = le16_to_cpu(*rxq->hw_cons_ptr); - sw_comp_cons = qed_chain_get_cons_idx(&rxq->rx_comp_ring); - - /* Memory barrier to prevent the CPU from doing speculative - * reads of CQE/BD before reading hw_comp_cons. If the CQE is - * read before it is written by FW, then FW writes CQE and SB, - * and then the CPU reads the hw_comp_cons, it will use an old - * CQE. - */ - rmb(); - /* Get the CQE from the completion ring */ cqe = (union eth_rx_cqe *)qed_chain_consume(&rxq->rx_comp_ring); @@ -2087,6 +2076,7 @@ err: } static const struct ethtool_ops qede_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS, .get_link_ksettings = qede_get_link_ksettings, .set_link_ksettings = qede_set_link_ksettings, .get_drvinfo = qede_get_drvinfo, @@ -2133,6 +2123,7 @@ static const struct ethtool_ops qede_ethtool_ops = { }; static const struct ethtool_ops qede_vf_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS, .get_link_ksettings = qede_get_link_ksettings, .get_drvinfo = qede_get_drvinfo, .get_msglevel = qede_get_msglevel, diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 6505f7e2d1db..fe72bb6c9455 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1757,7 +1757,7 @@ static int qede_parse_actions(struct qede_dev *edev, return -EINVAL; } - if (!flow_action_basic_hw_stats_types_check(flow_action, extack)) + if (!flow_action_basic_hw_stats_check(flow_action, extack)) return -EOPNOTSUPP; flow_action_for_each(i, act, flow_action) { diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c index 75d83c3cbf27..5c2a3acf1e89 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c @@ -1542,24 +1542,7 @@ static int qlcnic_set_intr_coalesce(struct net_device *netdev, if (ethcoal->rx_coalesce_usecs > 0xffff || ethcoal->rx_max_coalesced_frames > 0xffff || ethcoal->tx_coalesce_usecs > 0xffff || - ethcoal->tx_max_coalesced_frames > 0xffff || - ethcoal->rx_coalesce_usecs_irq || - ethcoal->rx_max_coalesced_frames_irq || - ethcoal->tx_coalesce_usecs_irq || - ethcoal->tx_max_coalesced_frames_irq || - ethcoal->stats_block_coalesce_usecs || - ethcoal->use_adaptive_rx_coalesce || - ethcoal->use_adaptive_tx_coalesce || - ethcoal->pkt_rate_low || - ethcoal->rx_coalesce_usecs_low || - ethcoal->rx_max_coalesced_frames_low || - ethcoal->tx_coalesce_usecs_low || - ethcoal->tx_max_coalesced_frames_low || - ethcoal->pkt_rate_high || - ethcoal->rx_coalesce_usecs_high || - ethcoal->rx_max_coalesced_frames_high || - ethcoal->tx_coalesce_usecs_high || - ethcoal->tx_max_coalesced_frames_high) + ethcoal->tx_max_coalesced_frames > 0xffff) return -EINVAL; err = qlcnic_config_intr_coalesce(adapter, ethcoal); @@ -1834,6 +1817,8 @@ qlcnic_set_dump(struct net_device *netdev, struct ethtool_dump *val) } const struct ethtool_ops qlcnic_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES, .get_drvinfo = qlcnic_get_drvinfo, .get_regs_len = qlcnic_get_regs_len, .get_regs = qlcnic_get_regs, @@ -1865,6 +1850,8 @@ const struct ethtool_ops qlcnic_ethtool_ops = { }; const struct ethtool_ops qlcnic_sriov_vf_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES, .get_drvinfo = qlcnic_get_drvinfo, .get_regs_len = qlcnic_get_regs_len, .get_regs = qlcnic_get_regs, diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index ce030e093485..b7dc1c112a94 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2006,6 +2006,8 @@ out: } static const struct ethtool_ops rtl8169_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES, .get_drvinfo = rtl8169_get_drvinfo, .get_regs_len = rtl8169_get_regs_len, .get_link = ethtool_op_get_link, @@ -2226,8 +2228,8 @@ u16 rtl8168h_2_get_adc_bias_ioffset(struct rtl8169_private *tp) static void rtl_schedule_task(struct rtl8169_private *tp, enum rtl_flag flag) { - if (!test_and_set_bit(flag, tp->wk.flags)) - schedule_work(&tp->wk.work); + set_bit(flag, tp->wk.flags); + schedule_work(&tp->wk.work); } static void rtl8169_init_phy(struct rtl8169_private *tp) @@ -4573,8 +4575,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) if (unlikely(status & RxFIFOOver && tp->mac_version == RTL_GIGA_MAC_VER_11)) { netif_stop_queue(tp->dev); - /* XXX - Hack alert. See rtl_task(). */ - set_bit(RTL_FLAG_TASK_RESET_PENDING, tp->wk.flags); + rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING); } rtl_irq_disable(tp); @@ -4587,31 +4588,17 @@ out: static void rtl_task(struct work_struct *work) { - static const struct { - int bitnr; - void (*action)(struct rtl8169_private *); - } rtl_work[] = { - { RTL_FLAG_TASK_RESET_PENDING, rtl_reset_work }, - }; struct rtl8169_private *tp = container_of(work, struct rtl8169_private, wk.work); - struct net_device *dev = tp->dev; - int i; rtl_lock_work(tp); - if (!netif_running(dev) || + if (!netif_running(tp->dev) || !test_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags)) goto out_unlock; - for (i = 0; i < ARRAY_SIZE(rtl_work); i++) { - bool pending; - - pending = test_and_clear_bit(rtl_work[i].bitnr, tp->wk.flags); - if (pending) - rtl_work[i].action(tp); - } - + if (test_and_clear_bit(RTL_FLAG_TASK_RESET_PENDING, tp->wk.flags)) + rtl_reset_work(tp); out_unlock: rtl_unlock_work(tp); } diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c index e367e77c773b..b73f7d023e99 100644 --- a/drivers/net/ethernet/realtek/r8169_phy_config.c +++ b/drivers/net/ethernet/realtek/r8169_phy_config.c @@ -796,6 +796,11 @@ static void rtl8168g_disable_aldps(struct phy_device *phydev) phy_modify_paged(phydev, 0x0a43, 0x10, BIT(2), 0); } +static void rtl8168g_enable_gphy_10m(struct phy_device *phydev) +{ + phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(11)); +} + static void rtl8168g_phy_adjust_10m_aldps(struct phy_device *phydev) { phy_modify_paged(phydev, 0x0bcc, 0x14, BIT(8), 0); @@ -904,8 +909,7 @@ static void rtl8168h_1_hw_phy_config(struct rtl8169_private *tp, r8168g_phy_param(phydev, 0x0811, 0x0000, 0x0800); phy_modify_paged(phydev, 0x0a42, 0x16, 0x0000, 0x0002); - /* enable GPHY 10M */ - phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(11)); + rtl8168g_enable_gphy_10m(phydev); /* SAR ADC performance */ phy_modify_paged(phydev, 0x0bca, 0x17, BIT(12) | BIT(13), BIT(14)); @@ -940,8 +944,7 @@ static void rtl8168h_2_hw_phy_config(struct rtl8169_private *tp, r8168g_phy_param(phydev, 0x0811, 0x0000, 0x0800); phy_modify_paged(phydev, 0x0a42, 0x16, 0x0000, 0x0002); - /* enable GPHY 10M */ - phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(11)); + rtl8168g_enable_gphy_10m(phydev); ioffset = rtl8168h_2_get_adc_bias_ioffset(tp); if (ioffset != 0xffff) @@ -1063,8 +1066,7 @@ static void rtl8117_hw_phy_config(struct rtl8169_private *tp, r8168g_phy_param(phydev, 0x8011, 0x0000, 0x0800); - /* enable GPHY 10M */ - phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(11)); + rtl8168g_enable_gphy_10m(phydev); r8168g_phy_param(phydev, 0x8016, 0x0000, 0x0400); @@ -1171,7 +1173,7 @@ static void rtl8125_1_hw_phy_config(struct rtl8169_private *tp, phy_write_paged(phydev, 0xbc3, 0x12, 0x5555); phy_modify_paged(phydev, 0xbf0, 0x15, 0x0e00, 0x0a00); phy_modify_paged(phydev, 0xa5c, 0x10, 0x0400, 0x0000); - phy_modify_paged(phydev, 0xa44, 0x11, 0x0000, 0x0800); + rtl8168g_enable_gphy_10m(phydev); rtl8125_config_eee_phy(phydev); } @@ -1236,7 +1238,7 @@ static void rtl8125_2_hw_phy_config(struct rtl8169_private *tp, phy_modify_paged(phydev, 0xa5d, 0x12, 0x0000, 0x0020); phy_modify_paged(phydev, 0xad4, 0x17, 0x0010, 0x0000); phy_modify_paged(phydev, 0xa86, 0x15, 0x0001, 0x0000); - phy_modify_paged(phydev, 0xa44, 0x11, 0x0000, 0x0800); + rtl8168g_enable_gphy_10m(phydev); rtl8125_config_eee_phy(phydev); } diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c index 466483c4ac67..21465cb3d60a 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c @@ -476,6 +476,7 @@ static int sxgbe_get_regs_len(struct net_device *dev) } static const struct ethtool_ops sxgbe_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS, .get_drvinfo = sxgbe_getdrvinfo, .get_msglevel = sxgbe_getmsglevel, .set_msglevel = sxgbe_setmsglevel, diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c index c705743d69f7..2cc8184b7e6b 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c @@ -2277,7 +2277,7 @@ static int __init sxgbe_cmdline_opt(char *str) if (!str || !*str) return -EINVAL; while ((opt = strsep(&str, ",")) != NULL) { - if (!strncmp(opt, "eee_timer:", 6)) { + if (!strncmp(opt, "eee_timer:", 10)) { if (kstrtoint(opt + 10, 0, &eee_timer)) goto err; } diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 52113b7529d6..3f16bd807c6e 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -2853,11 +2853,24 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) } /* Transmit timestamps are only available for 8XXX series. They result - * in three events per packet. These occur in order, and are: - * - the normal completion event + * in up to three events per packet. These occur in order, and are: + * - the normal completion event (may be omitted) * - the low part of the timestamp * - the high part of the timestamp * + * It's possible for multiple completion events to appear before the + * corresponding timestamps. So we can for example get: + * COMP N + * COMP N+1 + * TS_LO N + * TS_HI N + * TS_LO N+1 + * TS_HI N+1 + * + * In addition it's also possible for the adjacent completions to be + * merged, so we may not see COMP N above. As such, the completion + * events are not very useful here. + * * Each part of the timestamp is itself split across two 16 bit * fields in the event. */ @@ -2865,17 +2878,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) switch (tx_ev_type) { case TX_TIMESTAMP_EVENT_TX_EV_COMPLETION: - /* In case of Queue flush or FLR, we might have received - * the previous TX completion event but not the Timestamp - * events. - */ - if (tx_queue->completed_desc_ptr != tx_queue->ptr_mask) - efx_xmit_done(tx_queue, tx_queue->completed_desc_ptr); - - tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, - ESF_DZ_TX_DESCR_INDX); - tx_queue->completed_desc_ptr = - tx_ev_desc_ptr & tx_queue->ptr_mask; + /* Ignore this event - see above. */ break; case TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_LO: @@ -2887,8 +2890,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) ts_part = efx_ef10_extract_event_ts(event); tx_queue->completed_timestamp_major = ts_part; - efx_xmit_done(tx_queue, tx_queue->completed_desc_ptr); - tx_queue->completed_desc_ptr = tx_queue->ptr_mask; + efx_xmit_done_single(tx_queue); break; default: diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c index 14393767ef9f..4580b30caae1 100644 --- a/drivers/net/ethernet/sfc/ef10_sriov.c +++ b/drivers/net/ethernet/sfc/ef10_sriov.c @@ -685,10 +685,70 @@ reset_nic: return rc ? rc : rc2; } -int efx_ef10_sriov_set_vf_spoofchk(struct efx_nic *efx, int vf_i, - bool spoofchk) +static int efx_ef10_sriov_set_privilege_mask(struct efx_nic *efx, int vf_i, + u32 mask, u32 value) { - return spoofchk ? -EOPNOTSUPP : 0; + MCDI_DECLARE_BUF(pm_outbuf, MC_CMD_PRIVILEGE_MASK_OUT_LEN); + MCDI_DECLARE_BUF(pm_inbuf, MC_CMD_PRIVILEGE_MASK_IN_LEN); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + u32 old_mask, new_mask; + size_t outlen; + int rc; + + EFX_WARN_ON_PARANOID((value & ~mask) != 0); + + /* Get privilege mask */ + MCDI_POPULATE_DWORD_2(pm_inbuf, PRIVILEGE_MASK_IN_FUNCTION, + PRIVILEGE_MASK_IN_FUNCTION_PF, nic_data->pf_index, + PRIVILEGE_MASK_IN_FUNCTION_VF, vf_i); + + rc = efx_mcdi_rpc(efx, MC_CMD_PRIVILEGE_MASK, + pm_inbuf, sizeof(pm_inbuf), + pm_outbuf, sizeof(pm_outbuf), &outlen); + + if (rc != 0) + return rc; + if (outlen != MC_CMD_PRIVILEGE_MASK_OUT_LEN) + return -EIO; + + old_mask = MCDI_DWORD(pm_outbuf, PRIVILEGE_MASK_OUT_OLD_MASK); + + new_mask = old_mask & ~mask; + new_mask |= value; + + if (new_mask == old_mask) + return 0; + + new_mask |= MC_CMD_PRIVILEGE_MASK_IN_DO_CHANGE; + + /* Set privilege mask */ + MCDI_SET_DWORD(pm_inbuf, PRIVILEGE_MASK_IN_NEW_MASK, new_mask); + + rc = efx_mcdi_rpc(efx, MC_CMD_PRIVILEGE_MASK, + pm_inbuf, sizeof(pm_inbuf), + pm_outbuf, sizeof(pm_outbuf), &outlen); + + if (rc != 0) + return rc; + if (outlen != MC_CMD_PRIVILEGE_MASK_OUT_LEN) + return -EIO; + + return 0; +} + +int efx_ef10_sriov_set_vf_spoofchk(struct efx_nic *efx, int vf_i, bool spoofchk) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + /* Can't enable spoofchk if firmware doesn't support it. */ + if (!(nic_data->datapath_caps & + BIT(MC_CMD_GET_CAPABILITIES_OUT_TX_MAC_SECURITY_FILTERING_LBN)) && + spoofchk) + return -EOPNOTSUPP; + + return efx_ef10_sriov_set_privilege_mask(efx, vf_i, + MC_CMD_PRIVILEGE_MASK_IN_GRP_MAC_SPOOFING_TX, + spoofchk ? 0 : MC_CMD_PRIVILEGE_MASK_IN_GRP_MAC_SPOOFING_TX); } int efx_ef10_sriov_set_vf_link_state(struct efx_nic *efx, int vf_i, diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index da54afaa3c44..66dcab140449 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -20,6 +20,7 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev); netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb); void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index); +void efx_xmit_done_single(struct efx_tx_queue *tx_queue); int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type, void *type_data); extern unsigned int efx_piobuf_size; diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index d2d738314c50..c492523b986c 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -600,6 +600,7 @@ struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel) if (tx_queue->channel) tx_queue->channel = channel; tx_queue->buffer = NULL; + tx_queue->cb_page = NULL; memset(&tx_queue->txd, 0, sizeof(tx_queue->txd)); } diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c index b0d76bc19673..1799ff9a45d9 100644 --- a/drivers/net/ethernet/sfc/efx_common.c +++ b/drivers/net/ethernet/sfc/efx_common.c @@ -200,11 +200,11 @@ void efx_link_status_changed(struct efx_nic *efx) unsigned int efx_xdp_max_mtu(struct efx_nic *efx) { /* The maximum MTU that we can fit in a single page, allowing for - * framing, overhead and XDP headroom. + * framing, overhead and XDP headroom + tailroom. */ int overhead = EFX_MAX_FRAME_LEN(0) + sizeof(struct efx_rx_page_state) + efx->rx_prefix_size + efx->type->rx_buffer_padding + - efx->rx_ip_align + XDP_PACKET_HEADROOM; + efx->rx_ip_align + EFX_XDP_HEADROOM + EFX_XDP_TAILROOM; return PAGE_SIZE - overhead; } @@ -302,8 +302,9 @@ static void efx_start_datapath(struct efx_nic *efx) efx->rx_dma_len = (efx->rx_prefix_size + EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + efx->type->rx_buffer_padding); - rx_buf_len = (sizeof(struct efx_rx_page_state) + XDP_PACKET_HEADROOM + - efx->rx_ip_align + efx->rx_dma_len); + rx_buf_len = (sizeof(struct efx_rx_page_state) + EFX_XDP_HEADROOM + + efx->rx_ip_align + efx->rx_dma_len + EFX_XDP_TAILROOM); + if (rx_buf_len <= PAGE_SIZE) { efx->rx_scatter = efx->type->always_rx_scatter; efx->rx_buffer_order = 0; diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 993b5769525b..04e88d05e8ff 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -232,9 +232,6 @@ static int efx_ethtool_set_coalesce(struct net_device *net_dev, bool adaptive, rx_may_override_tx; int rc; - if (coalesce->use_adaptive_tx_coalesce) - return -EINVAL; - efx_get_irq_moderation(efx, &tx_usecs, &rx_usecs, &adaptive); if (coalesce->rx_coalesce_usecs != rx_usecs) @@ -582,6 +579,7 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev, case ETHTOOL_GRXFH: { struct efx_rss_context *ctx = &efx->rss_context; + __u64 data; mutex_lock(&efx->rss_lock); if (info->flow_type & FLOW_RSS && info->rss_context) { @@ -591,35 +589,38 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev, goto out_unlock; } } - info->data = 0; + + data = 0; if (!efx_rss_active(ctx)) /* No RSS */ - goto out_unlock; + goto out_setdata_unlock; + switch (info->flow_type & ~FLOW_RSS) { case UDP_V4_FLOW: - if (ctx->rx_hash_udp_4tuple) - /* fall through */ - case TCP_V4_FLOW: - info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; - /* fall through */ - case SCTP_V4_FLOW: - case AH_ESP_V4_FLOW: - case IPV4_FLOW: - info->data |= RXH_IP_SRC | RXH_IP_DST; - break; case UDP_V6_FLOW: if (ctx->rx_hash_udp_4tuple) - /* fall through */ + data = (RXH_L4_B_0_1 | RXH_L4_B_2_3 | + RXH_IP_SRC | RXH_IP_DST); + else + data = RXH_IP_SRC | RXH_IP_DST; + break; + case TCP_V4_FLOW: case TCP_V6_FLOW: - info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; - /* fall through */ + data = (RXH_L4_B_0_1 | RXH_L4_B_2_3 | + RXH_IP_SRC | RXH_IP_DST); + break; + case SCTP_V4_FLOW: case SCTP_V6_FLOW: + case AH_ESP_V4_FLOW: case AH_ESP_V6_FLOW: + case IPV4_FLOW: case IPV6_FLOW: - info->data |= RXH_IP_SRC | RXH_IP_DST; + data = RXH_IP_SRC | RXH_IP_DST; break; default: break; } +out_setdata_unlock: + info->data = data; out_unlock: mutex_unlock(&efx->rss_lock); return rc; @@ -1134,6 +1135,9 @@ static int efx_ethtool_set_fecparam(struct net_device *net_dev, } const struct ethtool_ops efx_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_USECS_IRQ | + ETHTOOL_COALESCE_USE_ADAPTIVE_RX, .get_drvinfo = efx_ethtool_get_drvinfo, .get_regs_len = efx_ethtool_get_regs_len, .get_regs = efx_ethtool_get_regs, diff --git a/drivers/net/ethernet/sfc/falcon/ethtool.c b/drivers/net/ethernet/sfc/falcon/ethtool.c index 08bd6a321918..db90d94e24c9 100644 --- a/drivers/net/ethernet/sfc/falcon/ethtool.c +++ b/drivers/net/ethernet/sfc/falcon/ethtool.c @@ -603,9 +603,6 @@ static int ef4_ethtool_set_coalesce(struct net_device *net_dev, bool adaptive, rx_may_override_tx; int rc; - if (coalesce->use_adaptive_tx_coalesce) - return -EINVAL; - ef4_get_irq_moderation(efx, &tx_usecs, &rx_usecs, &adaptive); if (coalesce->rx_coalesce_usecs != rx_usecs) @@ -1311,6 +1308,9 @@ static int ef4_ethtool_get_module_info(struct net_device *net_dev, } const struct ethtool_ops ef4_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_USECS_IRQ | + ETHTOOL_COALESCE_USE_ADAPTIVE_RX, .get_drvinfo = ef4_ethtool_get_drvinfo, .get_regs_len = ef4_ethtool_get_regs_len, .get_regs = ef4_ethtool_get_regs, diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 392bd5b7017e..b084e623b5f4 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -91,6 +91,12 @@ #define EFX_RX_BUF_ALIGNMENT 4 #endif +/* Non-standard XDP_PACKET_HEADROOM and tailroom to satisfy XDP_REDIRECT and + * still fit two standard MTU size packets into a single 4K page. + */ +#define EFX_XDP_HEADROOM 128 +#define EFX_XDP_TAILROOM SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + /* Forward declare Precision Time Protocol (PTP) support structure. */ struct efx_ptp_data; struct hwtstamp_config; @@ -208,8 +214,6 @@ struct efx_tx_buffer { * avoid cache-line ping-pong between the xmit path and the * completion path. * @merge_events: Number of TX merged completion events - * @completed_desc_ptr: Most recent completed pointer - only used with - * timestamping. * @completed_timestamp_major: Top part of the most recent tx timestamp. * @completed_timestamp_minor: Low part of the most recent tx timestamp. * @insert_count: Current insert pointer @@ -269,7 +273,6 @@ struct efx_tx_queue { unsigned int merge_events; unsigned int bytes_compl; unsigned int pkts_compl; - unsigned int completed_desc_ptr; u32 completed_timestamp_major; u32 completed_timestamp_minor; diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index a2042f16babc..260352d97d9d 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -302,7 +302,7 @@ static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel, efx->rx_prefix_size); xdp.data = *ehp; - xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM; + xdp.data_hard_start = xdp.data - EFX_XDP_HEADROOM; /* No support yet for XDP metadata */ xdp_set_data_meta_invalid(&xdp); diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c index ee8beb87bdc1..e10c23833515 100644 --- a/drivers/net/ethernet/sfc/rx_common.c +++ b/drivers/net/ethernet/sfc/rx_common.c @@ -412,10 +412,10 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic) index = rx_queue->added_count & rx_queue->ptr_mask; rx_buf = efx_rx_buffer(rx_queue, index); rx_buf->dma_addr = dma_addr + efx->rx_ip_align + - XDP_PACKET_HEADROOM; + EFX_XDP_HEADROOM; rx_buf->page = page; rx_buf->page_offset = page_offset + efx->rx_ip_align + - XDP_PACKET_HEADROOM; + EFX_XDP_HEADROOM; rx_buf->len = efx->rx_dma_len; rx_buf->flags = 0; ++rx_queue->added_count; @@ -433,7 +433,7 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic) void efx_rx_config_page_split(struct efx_nic *efx) { efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align + - XDP_PACKET_HEADROOM, + EFX_XDP_HEADROOM + EFX_XDP_TAILROOM, EFX_RX_BUF_ALIGNMENT); efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 : ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) / diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 696a77c20cb7..19b58563cb78 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -534,6 +534,44 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb, return efx_enqueue_skb(tx_queue, skb); } +void efx_xmit_done_single(struct efx_tx_queue *tx_queue) +{ + unsigned int pkts_compl = 0, bytes_compl = 0; + unsigned int read_ptr; + bool finished = false; + + read_ptr = tx_queue->read_count & tx_queue->ptr_mask; + + while (!finished) { + struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr]; + + if (!efx_tx_buffer_in_use(buffer)) { + struct efx_nic *efx = tx_queue->efx; + + netif_err(efx, hw, efx->net_dev, + "TX queue %d spurious single TX completion\n", + tx_queue->queue); + efx_schedule_reset(efx, RESET_TYPE_TX_SKIP); + return; + } + + /* Need to check the flag before dequeueing. */ + if (buffer->flags & EFX_TX_BUF_SKB) + finished = true; + efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); + + ++tx_queue->read_count; + read_ptr = tx_queue->read_count & tx_queue->ptr_mask; + } + + tx_queue->pkts_compl += pkts_compl; + tx_queue->bytes_compl += bytes_compl; + + EFX_WARN_ON_PARANOID(pkts_compl != 1); + + efx_xmit_done_check_empty(tx_queue); +} + void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue) { struct efx_nic *efx = tx_queue->efx; diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c index b1571e9789d0..70876df1da69 100644 --- a/drivers/net/ethernet/sfc/tx_common.c +++ b/drivers/net/ethernet/sfc/tx_common.c @@ -80,7 +80,6 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue) tx_queue->xmit_more_available = false; tx_queue->timestamping = (efx_ptp_use_mac_tx_timestamps(efx) && tx_queue->channel == efx_ptp_channel(efx)); - tx_queue->completed_desc_ptr = tx_queue->ptr_mask; tx_queue->completed_timestamp_major = 0; tx_queue->completed_timestamp_minor = 0; @@ -210,10 +209,9 @@ static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue, while (read_ptr != stop_index) { struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr]; - if (!(buffer->flags & EFX_TX_BUF_OPTION) && - unlikely(buffer->len == 0)) { + if (!efx_tx_buffer_in_use(buffer)) { netif_err(efx, tx_err, efx->net_dev, - "TX queue %d spurious TX completion id %x\n", + "TX queue %d spurious TX completion id %d\n", tx_queue->queue, read_ptr); efx_schedule_reset(efx, RESET_TYPE_TX_SKIP); return; @@ -226,6 +224,19 @@ static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue, } } +void efx_xmit_done_check_empty(struct efx_tx_queue *tx_queue) +{ + if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) { + tx_queue->old_write_count = READ_ONCE(tx_queue->write_count); + if (tx_queue->read_count == tx_queue->old_write_count) { + /* Ensure that read_count is flushed. */ + smp_mb(); + tx_queue->empty_read_count = + tx_queue->read_count | EFX_EMPTY_COUNT_VALID; + } + } +} + void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) { unsigned int fill_level, pkts_compl = 0, bytes_compl = 0; @@ -256,15 +267,7 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) netif_tx_wake_queue(tx_queue->core_txq); } - /* Check whether the hardware queue is now empty */ - if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) { - tx_queue->old_write_count = READ_ONCE(tx_queue->write_count); - if (tx_queue->read_count == tx_queue->old_write_count) { - smp_mb(); - tx_queue->empty_read_count = - tx_queue->read_count | EFX_EMPTY_COUNT_VALID; - } - } + efx_xmit_done_check_empty(tx_queue); } /* Remove buffers put into a tx_queue for the current packet. diff --git a/drivers/net/ethernet/sfc/tx_common.h b/drivers/net/ethernet/sfc/tx_common.h index f92f1fe3a87f..99cf7ce2f36c 100644 --- a/drivers/net/ethernet/sfc/tx_common.h +++ b/drivers/net/ethernet/sfc/tx_common.h @@ -21,6 +21,12 @@ void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, unsigned int *pkts_compl, unsigned int *bytes_compl); +static inline bool efx_tx_buffer_in_use(struct efx_tx_buffer *buffer) +{ + return buffer->len || (buffer->flags & EFX_TX_BUF_OPTION); +} + +void efx_xmit_done_check_empty(struct efx_tx_queue *tx_queue); void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index); void efx_enqueue_unwind(struct efx_tx_queue *tx_queue, diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 58b9b7ce7195..a5a0fb60193a 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -589,6 +589,8 @@ static void netsec_et_set_msglevel(struct net_device *dev, u32 datum) } static const struct ethtool_ops netsec_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES, .get_drvinfo = netsec_et_get_drvinfo, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 9bdbf589d93f..386663208c23 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -34,6 +34,11 @@ #define DWMAC_CORE_5_00 0x50 #define DWMAC_CORE_5_10 0x51 #define DWXGMAC_CORE_2_10 0x21 +#define DWXLGMAC_CORE_2_00 0x20 + +/* Device ID */ +#define DWXGMAC_ID 0x76 +#define DWXLGMAC_ID 0x27 #define STMMAC_CHAN0 0 /* Always supported and default for all chips */ @@ -426,6 +431,12 @@ struct mac_link { u32 speed5000; u32 speed10000; } xgmii; + struct { + u32 speed25000; + u32 speed40000; + u32 speed50000; + u32 speed100000; + } xlgmii; }; struct mii_regs { @@ -459,6 +470,7 @@ struct mac_device_info { unsigned int pcs; unsigned int pmt; unsigned int ps; + unsigned int xlgmac; }; struct stmmac_rx_routing { @@ -470,6 +482,7 @@ int dwmac100_setup(struct stmmac_priv *priv); int dwmac1000_setup(struct stmmac_priv *priv); int dwmac4_setup(struct stmmac_priv *priv); int dwxgmac2_setup(struct stmmac_priv *priv); +int dwxlgmac2_setup(struct stmmac_priv *priv); void stmmac_set_mac_addr(void __iomem *ioaddr, u8 addr[6], unsigned int high, unsigned int low); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index d0356fbd1e43..542784300620 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -24,6 +24,7 @@ static void dwmac1000_core_init(struct mac_device_info *hw, struct net_device *dev) { + struct stmmac_priv *priv = netdev_priv(dev); void __iomem *ioaddr = hw->pcsr; u32 value = readl(ioaddr + GMAC_CONTROL); int mtu = dev->mtu; @@ -35,7 +36,7 @@ static void dwmac1000_core_init(struct mac_device_info *hw, * Broadcom tags can look like invalid LLC/SNAP packets and cause the * hardware to truncate packets on reception. */ - if (netdev_uses_dsa(dev)) + if (netdev_uses_dsa(dev) || !priv->plat->enh_desc) value &= ~GMAC_CONTROL_ACS; if (mtu > 1500) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c index 9becca280074..6e30d7eb4983 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c @@ -6,6 +6,7 @@ */ #include <linux/io.h> +#include <linux/iopoll.h> #include <linux/delay.h> #include "common.h" #include "dwmac4_dma.h" @@ -14,22 +15,14 @@ int dwmac4_dma_reset(void __iomem *ioaddr) { u32 value = readl(ioaddr + DMA_BUS_MODE); - int limit; /* DMA SW reset */ value |= DMA_BUS_MODE_SFT_RESET; writel(value, ioaddr + DMA_BUS_MODE); - limit = 10; - while (limit--) { - if (!(readl(ioaddr + DMA_BUS_MODE) & DMA_BUS_MODE_SFT_RESET)) - break; - mdelay(10); - } - - if (limit < 0) - return -EBUSY; - return 0; + return readl_poll_timeout(ioaddr + DMA_BUS_MODE, value, + !(value & DMA_BUS_MODE_SFT_RESET), + 10000, 100000); } void dwmac4_set_rx_tail_ptr(void __iomem *ioaddr, u32 tail_ptr, u32 chan) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c index 688d36095333..cb87d31a99df 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c @@ -16,19 +16,14 @@ int dwmac_dma_reset(void __iomem *ioaddr) { u32 value = readl(ioaddr + DMA_BUS_MODE); - int err; /* DMA SW reset */ value |= DMA_BUS_MODE_SFT_RESET; writel(value, ioaddr + DMA_BUS_MODE); - err = readl_poll_timeout(ioaddr + DMA_BUS_MODE, value, + return readl_poll_timeout(ioaddr + DMA_BUS_MODE, value, !(value & DMA_BUS_MODE_SFT_RESET), 10000, 100000); - if (err) - return -EBUSY; - - return 0; } /* CSR1 enables the transmit DMA to check for new descriptor */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 67b754a56288..0e4575f7bedb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -9,6 +9,7 @@ #include <linux/iopoll.h> #include "stmmac.h" #include "stmmac_ptp.h" +#include "dwxlgmac2.h" #include "dwxgmac2.h" static void dwxgmac2_core_init(struct mac_device_info *hw, @@ -1485,6 +1486,67 @@ const struct stmmac_ops dwxgmac210_ops = { .fpe_configure = dwxgmac3_fpe_configure, }; +static void dwxlgmac2_rx_queue_enable(struct mac_device_info *hw, u8 mode, + u32 queue) +{ + void __iomem *ioaddr = hw->pcsr; + u32 value; + + value = readl(ioaddr + XLGMAC_RXQ_ENABLE_CTRL0) & ~XGMAC_RXQEN(queue); + if (mode == MTL_QUEUE_AVB) + value |= 0x1 << XGMAC_RXQEN_SHIFT(queue); + else if (mode == MTL_QUEUE_DCB) + value |= 0x2 << XGMAC_RXQEN_SHIFT(queue); + writel(value, ioaddr + XLGMAC_RXQ_ENABLE_CTRL0); +} + +const struct stmmac_ops dwxlgmac2_ops = { + .core_init = dwxgmac2_core_init, + .set_mac = dwxgmac2_set_mac, + .rx_ipc = dwxgmac2_rx_ipc, + .rx_queue_enable = dwxlgmac2_rx_queue_enable, + .rx_queue_prio = dwxgmac2_rx_queue_prio, + .tx_queue_prio = dwxgmac2_tx_queue_prio, + .rx_queue_routing = NULL, + .prog_mtl_rx_algorithms = dwxgmac2_prog_mtl_rx_algorithms, + .prog_mtl_tx_algorithms = dwxgmac2_prog_mtl_tx_algorithms, + .set_mtl_tx_queue_weight = dwxgmac2_set_mtl_tx_queue_weight, + .map_mtl_to_dma = dwxgmac2_map_mtl_to_dma, + .config_cbs = dwxgmac2_config_cbs, + .dump_regs = dwxgmac2_dump_regs, + .host_irq_status = dwxgmac2_host_irq_status, + .host_mtl_irq_status = dwxgmac2_host_mtl_irq_status, + .flow_ctrl = dwxgmac2_flow_ctrl, + .pmt = dwxgmac2_pmt, + .set_umac_addr = dwxgmac2_set_umac_addr, + .get_umac_addr = dwxgmac2_get_umac_addr, + .set_eee_mode = dwxgmac2_set_eee_mode, + .reset_eee_mode = dwxgmac2_reset_eee_mode, + .set_eee_timer = dwxgmac2_set_eee_timer, + .set_eee_pls = dwxgmac2_set_eee_pls, + .pcs_ctrl_ane = NULL, + .pcs_rane = NULL, + .pcs_get_adv_lp = NULL, + .debug = NULL, + .set_filter = dwxgmac2_set_filter, + .safety_feat_config = dwxgmac3_safety_feat_config, + .safety_feat_irq_status = dwxgmac3_safety_feat_irq_status, + .safety_feat_dump = dwxgmac3_safety_feat_dump, + .set_mac_loopback = dwxgmac2_set_mac_loopback, + .rss_configure = dwxgmac2_rss_configure, + .update_vlan_hash = dwxgmac2_update_vlan_hash, + .rxp_config = dwxgmac3_rxp_config, + .get_mac_tx_timestamp = dwxgmac2_get_mac_tx_timestamp, + .flex_pps_config = dwxgmac2_flex_pps_config, + .sarc_configure = dwxgmac2_sarc_configure, + .enable_vlan = dwxgmac2_enable_vlan, + .config_l3_filter = dwxgmac2_config_l3_filter, + .config_l4_filter = dwxgmac2_config_l4_filter, + .set_arp_offload = dwxgmac2_set_arp_offload, + .est_configure = dwxgmac3_est_configure, + .fpe_configure = dwxgmac3_fpe_configure, +}; + int dwxgmac2_setup(struct stmmac_priv *priv) { struct mac_device_info *mac = priv->hw; @@ -1521,3 +1583,40 @@ int dwxgmac2_setup(struct stmmac_priv *priv) return 0; } + +int dwxlgmac2_setup(struct stmmac_priv *priv) +{ + struct mac_device_info *mac = priv->hw; + + dev_info(priv->device, "\tXLGMAC\n"); + + priv->dev->priv_flags |= IFF_UNICAST_FLT; + mac->pcsr = priv->ioaddr; + mac->multicast_filter_bins = priv->plat->multicast_filter_bins; + mac->unicast_filter_entries = priv->plat->unicast_filter_entries; + mac->mcast_bits_log2 = 0; + + if (mac->multicast_filter_bins) + mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins); + + mac->link.duplex = 0; + mac->link.speed1000 = XLGMAC_CONFIG_SS_1000; + mac->link.speed2500 = XLGMAC_CONFIG_SS_2500; + mac->link.xgmii.speed10000 = XLGMAC_CONFIG_SS_10G; + mac->link.xlgmii.speed25000 = XLGMAC_CONFIG_SS_25G; + mac->link.xlgmii.speed40000 = XLGMAC_CONFIG_SS_40G; + mac->link.xlgmii.speed50000 = XLGMAC_CONFIG_SS_50G; + mac->link.xlgmii.speed100000 = XLGMAC_CONFIG_SS_100G; + mac->link.speed_mask = XLGMAC_CONFIG_SS; + + mac->mii.addr = XGMAC_MDIO_ADDR; + mac->mii.data = XGMAC_MDIO_DATA; + mac->mii.addr_shift = 16; + mac->mii.addr_mask = GENMASK(20, 16); + mac->mii.reg_shift = 0; + mac->mii.reg_mask = GENMASK(15, 0); + mac->mii.clk_csr_shift = 19; + mac->mii.clk_csr_mask = GENMASK(21, 19); + + return 0; +} diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxlgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxlgmac2.h new file mode 100644 index 000000000000..726090d49221 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwxlgmac2.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2020 Synopsys, Inc. and/or its affiliates. + * Synopsys DesignWare XLGMAC definitions. + */ + +#ifndef __STMMAC_DWXLGMAC2_H__ +#define __STMMAC_DWXLGMAC2_H__ + +/* MAC Registers */ +#define XLGMAC_CONFIG_SS GENMASK(30, 28) +#define XLGMAC_CONFIG_SS_SHIFT 28 +#define XLGMAC_CONFIG_SS_40G (0x0 << XLGMAC_CONFIG_SS_SHIFT) +#define XLGMAC_CONFIG_SS_25G (0x1 << XLGMAC_CONFIG_SS_SHIFT) +#define XLGMAC_CONFIG_SS_50G (0x2 << XLGMAC_CONFIG_SS_SHIFT) +#define XLGMAC_CONFIG_SS_100G (0x3 << XLGMAC_CONFIG_SS_SHIFT) +#define XLGMAC_CONFIG_SS_10G (0x4 << XLGMAC_CONFIG_SS_SHIFT) +#define XLGMAC_CONFIG_SS_2500 (0x6 << XLGMAC_CONFIG_SS_SHIFT) +#define XLGMAC_CONFIG_SS_1000 (0x7 << XLGMAC_CONFIG_SS_SHIFT) +#define XLGMAC_RXQ_ENABLE_CTRL0 0x00000140 + +#endif /* __STMMAC_DWXLGMAC2_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index 3af2e5015245..bb7114f970f8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -23,6 +23,18 @@ static u32 stmmac_get_id(struct stmmac_priv *priv, u32 id_reg) return reg & GENMASK(7, 0); } +static u32 stmmac_get_dev_id(struct stmmac_priv *priv, u32 id_reg) +{ + u32 reg = readl(priv->ioaddr + id_reg); + + if (!reg) { + dev_info(priv->device, "Version ID not available\n"); + return 0x0; + } + + return (reg & GENMASK(15, 8)) >> 8; +} + static void stmmac_dwmac_mode_quirk(struct stmmac_priv *priv) { struct mac_device_info *mac = priv->hw; @@ -69,11 +81,18 @@ static int stmmac_dwmac4_quirks(struct stmmac_priv *priv) return 0; } +static int stmmac_dwxlgmac_quirks(struct stmmac_priv *priv) +{ + priv->hw->xlgmac = true; + return 0; +} + static const struct stmmac_hwif_entry { bool gmac; bool gmac4; bool xgmac; u32 min_id; + u32 dev_id; const struct stmmac_regs_off regs; const void *desc; const void *dma; @@ -199,6 +218,7 @@ static const struct stmmac_hwif_entry { .gmac4 = false, .xgmac = true, .min_id = DWXGMAC_CORE_2_10, + .dev_id = DWXGMAC_ID, .regs = { .ptp_off = PTP_XGMAC_OFFSET, .mmc_off = MMC_XGMAC_OFFSET, @@ -212,6 +232,25 @@ static const struct stmmac_hwif_entry { .mmc = &dwxgmac_mmc_ops, .setup = dwxgmac2_setup, .quirks = NULL, + }, { + .gmac = false, + .gmac4 = false, + .xgmac = true, + .min_id = DWXLGMAC_CORE_2_00, + .dev_id = DWXLGMAC_ID, + .regs = { + .ptp_off = PTP_XGMAC_OFFSET, + .mmc_off = MMC_XGMAC_OFFSET, + }, + .desc = &dwxgmac210_desc_ops, + .dma = &dwxgmac210_dma_ops, + .mac = &dwxlgmac2_ops, + .hwtimestamp = &stmmac_ptp, + .mode = NULL, + .tc = &dwmac510_tc_ops, + .mmc = &dwxgmac_mmc_ops, + .setup = dwxlgmac2_setup, + .quirks = stmmac_dwxlgmac_quirks, }, }; @@ -223,13 +262,15 @@ int stmmac_hwif_init(struct stmmac_priv *priv) const struct stmmac_hwif_entry *entry; struct mac_device_info *mac; bool needs_setup = true; + u32 id, dev_id = 0; int i, ret; - u32 id; if (needs_gmac) { id = stmmac_get_id(priv, GMAC_VERSION); } else if (needs_gmac4 || needs_xgmac) { id = stmmac_get_id(priv, GMAC4_VERSION); + if (needs_xgmac) + dev_id = stmmac_get_dev_id(priv, GMAC4_VERSION); } else { id = 0; } @@ -267,6 +308,8 @@ int stmmac_hwif_init(struct stmmac_priv *priv) /* Use synopsys_id var because some setups can override this */ if (priv->synopsys_id < entry->min_id) continue; + if (needs_xgmac && (dev_id ^ entry->dev_id)) + continue; /* Only use generic HW helpers if needed */ mac->desc = mac->desc ? : entry->desc; diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index c71dd99c8abf..fc350149ba34 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -605,6 +605,7 @@ extern const struct stmmac_dma_ops dwmac410_dma_ops; extern const struct stmmac_ops dwmac510_ops; extern const struct stmmac_tc_ops dwmac510_tc_ops; extern const struct stmmac_ops dwxgmac210_ops; +extern const struct stmmac_ops dwxlgmac2_ops; extern const struct stmmac_dma_ops dwxgmac210_dma_ops; extern const struct stmmac_desc_ops dwxgmac210_desc_ops; extern const struct stmmac_mmc_ops dwmac_mmc_ops; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index 020159622559..fcf080243a0f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -10,6 +10,7 @@ *******************************************************************************/ #include <linux/io.h> +#include <linux/iopoll.h> #include <linux/delay.h> #include "common.h" #include "stmmac_ptp.h" @@ -53,7 +54,6 @@ static void config_sub_second_increment(void __iomem *ioaddr, static int init_systime(void __iomem *ioaddr, u32 sec, u32 nsec) { - int limit; u32 value; writel(sec, ioaddr + PTP_STSUR); @@ -64,16 +64,9 @@ static int init_systime(void __iomem *ioaddr, u32 sec, u32 nsec) writel(value, ioaddr + PTP_TCR); /* wait for present system time initialize to complete */ - limit = 10; - while (limit--) { - if (!(readl(ioaddr + PTP_TCR) & PTP_TCR_TSINIT)) - break; - mdelay(10); - } - if (limit < 0) - return -EBUSY; - - return 0; + return readl_poll_timeout(ioaddr + PTP_TCR, value, + !(value & PTP_TCR_TSINIT), + 10000, 100000); } static int config_addend(void __iomem *ioaddr, u32 addend) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f26699d9a050..0e8c80f23557 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -849,6 +849,38 @@ static void stmmac_validate(struct phylink_config *config, phylink_set(mac_supported, 10000baseKX4_Full); phylink_set(mac_supported, 10000baseKR_Full); } + if (!max_speed || (max_speed >= 25000)) { + phylink_set(mac_supported, 25000baseCR_Full); + phylink_set(mac_supported, 25000baseKR_Full); + phylink_set(mac_supported, 25000baseSR_Full); + } + if (!max_speed || (max_speed >= 40000)) { + phylink_set(mac_supported, 40000baseKR4_Full); + phylink_set(mac_supported, 40000baseCR4_Full); + phylink_set(mac_supported, 40000baseSR4_Full); + phylink_set(mac_supported, 40000baseLR4_Full); + } + if (!max_speed || (max_speed >= 50000)) { + phylink_set(mac_supported, 50000baseCR2_Full); + phylink_set(mac_supported, 50000baseKR2_Full); + phylink_set(mac_supported, 50000baseSR2_Full); + phylink_set(mac_supported, 50000baseKR_Full); + phylink_set(mac_supported, 50000baseSR_Full); + phylink_set(mac_supported, 50000baseCR_Full); + phylink_set(mac_supported, 50000baseLR_ER_FR_Full); + phylink_set(mac_supported, 50000baseDR_Full); + } + if (!max_speed || (max_speed >= 100000)) { + phylink_set(mac_supported, 100000baseKR4_Full); + phylink_set(mac_supported, 100000baseSR4_Full); + phylink_set(mac_supported, 100000baseCR4_Full); + phylink_set(mac_supported, 100000baseLR4_ER4_Full); + phylink_set(mac_supported, 100000baseKR2_Full); + phylink_set(mac_supported, 100000baseSR2_Full); + phylink_set(mac_supported, 100000baseCR2_Full); + phylink_set(mac_supported, 100000baseLR2_ER2_FR2_Full); + phylink_set(mac_supported, 100000baseDR2_Full); + } } /* Half-Duplex can only work with single queue */ @@ -929,6 +961,32 @@ static void stmmac_mac_link_up(struct phylink_config *config, default: return; } + } else if (interface == PHY_INTERFACE_MODE_XLGMII) { + switch (speed) { + case SPEED_100000: + ctrl |= priv->hw->link.xlgmii.speed100000; + break; + case SPEED_50000: + ctrl |= priv->hw->link.xlgmii.speed50000; + break; + case SPEED_40000: + ctrl |= priv->hw->link.xlgmii.speed40000; + break; + case SPEED_25000: + ctrl |= priv->hw->link.xlgmii.speed25000; + break; + case SPEED_10000: + ctrl |= priv->hw->link.xgmii.speed10000; + break; + case SPEED_2500: + ctrl |= priv->hw->link.speed2500; + break; + case SPEED_1000: + ctrl |= priv->hw->link.speed1000; + break; + default: + return; + } } else { switch (speed) { case SPEED_2500: diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 165958c9f069..ac80373b57e6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -645,8 +645,6 @@ EXPORT_SYMBOL_GPL(stmmac_remove_config_dt); int stmmac_get_platform_resources(struct platform_device *pdev, struct stmmac_resources *stmmac_res) { - struct resource *res; - memset(stmmac_res, 0, sizeof(*stmmac_res)); /* Get IRQ information early to have an ability to ask for deferred @@ -674,8 +672,7 @@ int stmmac_get_platform_resources(struct platform_device *pdev, if (stmmac_res->lpi_irq == -EPROBE_DEFER) return -EPROBE_DEFER; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - stmmac_res->addr = devm_ioremap_resource(&pdev->dev, res); + stmmac_res->addr = devm_platform_ioremap_resource(pdev, 0); return PTR_ERR_OR_ZERO(stmmac_res->addr); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c index 07dbe4f5456e..e6696495f126 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c @@ -1387,7 +1387,7 @@ static int __stmmac_test_l3filt(struct stmmac_priv *priv, u32 dst, u32 src, cls->rule = rule; rule->action.entries[0].id = FLOW_ACTION_DROP; - rule->action.entries[0].hw_stats_type = FLOW_ACTION_HW_STATS_TYPE_ANY; + rule->action.entries[0].hw_stats = FLOW_ACTION_HW_STATS_ANY; rule->action.num_entries = 1; attr.dst = priv->dev->dev_addr; @@ -1516,7 +1516,7 @@ static int __stmmac_test_l4filt(struct stmmac_priv *priv, u32 dst, u32 src, cls->rule = rule; rule->action.entries[0].id = FLOW_ACTION_DROP; - rule->action.entries[0].hw_stats_type = FLOW_ACTION_HW_STATS_TYPE_ANY; + rule->action.entries[0].hw_stats = FLOW_ACTION_HW_STATS_ANY; rule->action.num_entries = 1; attr.dst = priv->dev->dev_addr; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index a0e6118444b0..3d747846f482 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -376,7 +376,7 @@ static int tc_parse_flow_actions(struct stmmac_priv *priv, if (!flow_action_has_entries(action)) return -EINVAL; - if (!flow_action_basic_hw_stats_types_check(action, extack)) + if (!flow_action_basic_hw_stats_check(action, extack)) return -EOPNOTSUPP; flow_action_for_each(i, act, action) { diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-ethtool.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-ethtool.c index fde722136869..bc198eadfcab 100644 --- a/drivers/net/ethernet/synopsys/dwc-xlgmac-ethtool.c +++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-ethtool.c @@ -151,7 +151,6 @@ static int xlgmac_ethtool_get_coalesce(struct net_device *netdev, { struct xlgmac_pdata *pdata = netdev_priv(netdev); - memset(ec, 0, sizeof(struct ethtool_coalesce)); ec->rx_coalesce_usecs = pdata->rx_usecs; ec->rx_max_coalesced_frames = pdata->rx_frames; ec->tx_max_coalesced_frames = pdata->tx_frames; @@ -167,20 +166,6 @@ static int xlgmac_ethtool_set_coalesce(struct net_device *netdev, unsigned int rx_frames, rx_riwt, rx_usecs; unsigned int tx_frames; - /* Check for not supported parameters */ - if ((ec->rx_coalesce_usecs_irq) || (ec->rx_max_coalesced_frames_irq) || - (ec->tx_coalesce_usecs) || (ec->tx_coalesce_usecs_high) || - (ec->tx_max_coalesced_frames_irq) || (ec->tx_coalesce_usecs_irq) || - (ec->stats_block_coalesce_usecs) || (ec->pkt_rate_low) || - (ec->use_adaptive_rx_coalesce) || (ec->use_adaptive_tx_coalesce) || - (ec->rx_max_coalesced_frames_low) || (ec->rx_coalesce_usecs_low) || - (ec->tx_coalesce_usecs_low) || (ec->tx_max_coalesced_frames_low) || - (ec->pkt_rate_high) || (ec->rx_coalesce_usecs_high) || - (ec->rx_max_coalesced_frames_high) || - (ec->tx_max_coalesced_frames_high) || - (ec->rate_sample_interval)) - return -EOPNOTSUPP; - rx_usecs = ec->rx_coalesce_usecs; rx_riwt = hw_ops->usec_to_riwt(pdata, rx_usecs); rx_frames = ec->rx_max_coalesced_frames; @@ -257,6 +242,8 @@ static void xlgmac_ethtool_get_ethtool_stats(struct net_device *netdev, } static const struct ethtool_ops xlgmac_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS | + ETHTOOL_COALESCE_MAX_FRAMES, .get_drvinfo = xlgmac_ethtool_get_drvinfo, .get_link = ethtool_op_get_link, .get_msglevel = xlgmac_ethtool_get_msglevel, diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c index 0f8a924fc60c..40a2ce0ca808 100644 --- a/drivers/net/ethernet/tehuti/tehuti.c +++ b/drivers/net/ethernet/tehuti/tehuti.c @@ -2373,6 +2373,8 @@ static void bdx_get_ethtool_stats(struct net_device *netdev, static void bdx_set_ethtool_ops(struct net_device *netdev) { static const struct ethtool_ops bdx_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES, .get_drvinfo = bdx_get_drvinfo, .get_link = ethtool_op_get_link, .get_coalesce = bdx_get_coalesce, diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 6ae4a72e6f43..c2c5bf87da01 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1211,6 +1211,7 @@ static int cpsw_set_channels(struct net_device *ndev, } static const struct ethtool_ops cpsw_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS, .get_drvinfo = cpsw_get_drvinfo, .get_msglevel = cpsw_get_msglevel, .set_msglevel = cpsw_set_msglevel, diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 71215db7934b..9209e613257d 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -1175,6 +1175,7 @@ static int cpsw_set_channels(struct net_device *ndev, } static const struct ethtool_ops cpsw_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS, .get_drvinfo = cpsw_get_drvinfo, .get_msglevel = cpsw_get_msglevel, .set_msglevel = cpsw_set_msglevel, diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 75d4e16c692b..de282531f68b 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -481,6 +481,7 @@ static int emac_set_coalesce(struct net_device *ndev, * Ethtool support for EMAC adapter */ static const struct ethtool_ops ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS, .get_drvinfo = emac_get_drvinfo, .get_link = ethtool_op_get_link, .get_coalesce = emac_get_coalesce, diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c index 4b556b74541a..713dbc04b25b 100644 --- a/drivers/net/ethernet/via/via-velocity.c +++ b/drivers/net/ethernet/via/via-velocity.c @@ -3648,6 +3648,8 @@ static void velocity_get_ethtool_stats(struct net_device *dev, } static const struct ethtool_ops velocity_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES, .get_drvinfo = velocity_get_drvinfo, .get_wol = velocity_ethtool_get_wol, .set_wol = velocity_ethtool_set_wol, diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index dc022cd5bc42..3e313e71ae36 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1314,25 +1314,6 @@ static int ll_temac_ethtools_set_coalesce(struct net_device *ndev, return -EFAULT; } - if (ec->rx_coalesce_usecs_irq || - ec->rx_max_coalesced_frames_irq || - ec->tx_coalesce_usecs_irq || - ec->tx_max_coalesced_frames_irq || - ec->stats_block_coalesce_usecs || - ec->use_adaptive_rx_coalesce || - ec->use_adaptive_tx_coalesce || - ec->pkt_rate_low || - ec->rx_coalesce_usecs_low || - ec->rx_max_coalesced_frames_low || - ec->tx_coalesce_usecs_low || - ec->tx_max_coalesced_frames_low || - ec->pkt_rate_high || - ec->rx_coalesce_usecs_high || - ec->rx_max_coalesced_frames_high || - ec->tx_coalesce_usecs_high || - ec->tx_max_coalesced_frames_high || - ec->rate_sample_interval) - return -EOPNOTSUPP; if (ec->rx_max_coalesced_frames) lp->coalesce_count_rx = ec->rx_max_coalesced_frames; if (ec->tx_max_coalesced_frames) @@ -1351,6 +1332,8 @@ static int ll_temac_ethtools_set_coalesce(struct net_device *ndev, } static const struct ethtool_ops temac_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES, .nway_reset = phy_ethtool_nway_reset, .get_link = ethtool_op_get_link, .get_ts_info = ethtool_op_get_ts_info, diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index c2f4c5ca2e80..e2f3e2b0cec7 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1309,27 +1309,6 @@ static int axienet_ethtools_set_coalesce(struct net_device *ndev, return -EFAULT; } - if ((ecoalesce->rx_coalesce_usecs) || - (ecoalesce->rx_coalesce_usecs_irq) || - (ecoalesce->rx_max_coalesced_frames_irq) || - (ecoalesce->tx_coalesce_usecs) || - (ecoalesce->tx_coalesce_usecs_irq) || - (ecoalesce->tx_max_coalesced_frames_irq) || - (ecoalesce->stats_block_coalesce_usecs) || - (ecoalesce->use_adaptive_rx_coalesce) || - (ecoalesce->use_adaptive_tx_coalesce) || - (ecoalesce->pkt_rate_low) || - (ecoalesce->rx_coalesce_usecs_low) || - (ecoalesce->rx_max_coalesced_frames_low) || - (ecoalesce->tx_coalesce_usecs_low) || - (ecoalesce->tx_max_coalesced_frames_low) || - (ecoalesce->pkt_rate_high) || - (ecoalesce->rx_coalesce_usecs_high) || - (ecoalesce->rx_max_coalesced_frames_high) || - (ecoalesce->tx_coalesce_usecs_high) || - (ecoalesce->tx_max_coalesced_frames_high) || - (ecoalesce->rate_sample_interval)) - return -EOPNOTSUPP; if (ecoalesce->rx_max_coalesced_frames) lp->coalesce_count_rx = ecoalesce->rx_max_coalesced_frames; if (ecoalesce->tx_max_coalesced_frames) @@ -1357,6 +1336,7 @@ axienet_ethtools_set_link_ksettings(struct net_device *ndev, } static const struct ethtool_ops axienet_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES, .get_drvinfo = axienet_ethtools_get_drvinfo, .get_regs_len = axienet_ethtools_get_regs_len, .get_regs = axienet_ethtools_get_regs, diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index 430c93786153..e04c3b60cae7 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -946,7 +946,8 @@ static int ca8210_spi_transfer( cas_ctl->transfer.bits_per_word = 0; /* Use device setting */ cas_ctl->transfer.tx_buf = cas_ctl->tx_buf; cas_ctl->transfer.rx_buf = cas_ctl->tx_in_buf; - cas_ctl->transfer.delay_usecs = 0; + cas_ctl->transfer.delay.value = 0; + cas_ctl->transfer.delay.unit = SPI_DELAY_UNIT_USECS; cas_ctl->transfer.cs_change = 0; cas_ctl->transfer.len = sizeof(struct mac_message); cas_ctl->msg.complete = ca8210_spi_transfer_complete; diff --git a/drivers/net/ipa/Kconfig b/drivers/net/ipa/Kconfig index b8cb7cadbf75..9f0d2a93379c 100644 --- a/drivers/net/ipa/Kconfig +++ b/drivers/net/ipa/Kconfig @@ -1,9 +1,9 @@ config QCOM_IPA tristate "Qualcomm IPA support" depends on ARCH_QCOM && 64BIT && NET + depends on QCOM_Q6V5_MSS select QCOM_QMI_HELPERS select QCOM_MDT_LOADER - default QCOM_Q6V5_COMMON help Choose Y or M here to include support for the Qualcomm IP Accelerator (IPA), a hardware block present in some diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 915b4cd05dd2..6de03be28784 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -9,7 +9,6 @@ #include <linux/slab.h> #include <linux/bitfield.h> #include <linux/if_rmnet.h> -#include <linux/version.h> #include <linux/dma-direction.h> #include "gsi.h" @@ -27,8 +26,8 @@ #define IPA_REPLENISH_BATCH 16 -#define IPA_RX_BUFFER_SIZE (PAGE_SIZE << IPA_RX_BUFFER_ORDER) -#define IPA_RX_BUFFER_ORDER 1 /* 8KB endpoint RX buffers (2 pages) */ +/* RX buffer is 1 page (or a power-of-2 contiguous pages) */ +#define IPA_RX_BUFFER_SIZE 8192 /* PAGE_SIZE > 4096 wastes a LOT */ /* The amount of RX buffer space consumed by standard skb overhead */ #define IPA_RX_BUFFER_OVERHEAD (PAGE_SIZE - SKB_MAX_ORDER(NET_SKB_PAD, 0)) @@ -759,7 +758,7 @@ static int ipa_endpoint_replenish_one(struct ipa_endpoint *endpoint) u32 len; int ret; - page = dev_alloc_pages(IPA_RX_BUFFER_ORDER); + page = dev_alloc_pages(get_order(IPA_RX_BUFFER_SIZE)); if (!page) return -ENOMEM; @@ -788,7 +787,7 @@ static int ipa_endpoint_replenish_one(struct ipa_endpoint *endpoint) err_trans_free: gsi_trans_free(trans); err_free_pages: - __free_pages(page, IPA_RX_BUFFER_ORDER); + __free_pages(page, get_order(IPA_RX_BUFFER_SIZE)); return -ENOMEM; } @@ -1074,7 +1073,7 @@ void ipa_endpoint_trans_release(struct ipa_endpoint *endpoint, struct page *page = trans->data; if (page) - __free_pages(page, IPA_RX_BUFFER_ORDER); + __free_pages(page, get_order(IPA_RX_BUFFER_SIZE)); } } diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index d6e7f257e99d..28998dcce3d2 100644 --- a/drivers/net/ipa/ipa_main.c +++ b/drivers/net/ipa/ipa_main.c @@ -942,7 +942,6 @@ static struct platform_driver ipa_driver = { .remove = ipa_remove, .driver = { .name = "ipa", - .owner = THIS_MODULE, .pm = &ipa_pm_ops, .of_match_table = ipa_match, }, diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 30cd0c4f0be0..8801d093135c 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -293,6 +293,7 @@ void ipvlan_process_multicast(struct work_struct *work) } if (dev) dev_put(dev); + cond_resched(); } } @@ -498,19 +499,21 @@ static int ipvlan_process_outbound(struct sk_buff *skb) struct ethhdr *ethh = eth_hdr(skb); int ret = NET_XMIT_DROP; - /* In this mode we dont care about multicast and broadcast traffic */ - if (is_multicast_ether_addr(ethh->h_dest)) { - pr_debug_ratelimited("Dropped {multi|broad}cast of type=[%x]\n", - ntohs(skb->protocol)); - kfree_skb(skb); - goto out; - } - /* The ipvlan is a pseudo-L2 device, so the packets that we receive * will have L2; which need to discarded and processed further * in the net-ns of the main-device. */ if (skb_mac_header_was_set(skb)) { + /* In this mode we dont care about + * multicast and broadcast traffic */ + if (is_multicast_ether_addr(ethh->h_dest)) { + pr_debug_ratelimited( + "Dropped {multi|broad}cast of type=[%x]\n", + ntohs(skb->protocol)); + kfree_skb(skb); + goto out; + } + skb_pull(skb, sizeof(*ethh)); skb->mac_header = (typeof(skb->mac_header))~0U; skb_reset_network_header(skb); diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index a70662261a5a..f195f278a83a 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -164,7 +164,6 @@ static void ipvlan_uninit(struct net_device *dev) static int ipvlan_open(struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); - struct net_device *phy_dev = ipvlan->phy_dev; struct ipvl_addr *addr; if (ipvlan->port->mode == IPVLAN_MODE_L3 || @@ -178,7 +177,7 @@ static int ipvlan_open(struct net_device *dev) ipvlan_ht_addr_add(ipvlan, addr); rcu_read_unlock(); - return dev_uc_add(phy_dev, phy_dev->dev_addr); + return 0; } static int ipvlan_stop(struct net_device *dev) @@ -190,8 +189,6 @@ static int ipvlan_stop(struct net_device *dev) dev_uc_unsync(phy_dev, dev); dev_mc_unsync(phy_dev, dev); - dev_uc_del(phy_dev, phy_dev->dev_addr); - rcu_read_lock(); list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) ipvlan_ht_addr_del(addr); diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 45bfd99f17fa..49b138e7aeac 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -19,6 +19,7 @@ #include <net/gro_cells.h> #include <net/macsec.h> #include <linux/phy.h> +#include <linux/byteorder/generic.h> #include <uapi/linux/if_macsec.h> @@ -68,6 +69,16 @@ struct macsec_eth_header { sc; \ sc = rtnl_dereference(sc->next)) +#define pn_same_half(pn1, pn2) (!(((pn1) >> 31) ^ ((pn2) >> 31))) + +struct gcm_iv_xpn { + union { + u8 short_secure_channel_id[4]; + ssci_t ssci; + }; + __be64 pn; +} __packed; + struct gcm_iv { union { u8 secure_channel_id[8]; @@ -229,11 +240,13 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb) #define MACSEC_PORT_ES (htons(0x0001)) #define MACSEC_PORT_SCB (0x0000) #define MACSEC_UNDEF_SCI ((__force sci_t)0xffffffffffffffffULL) +#define MACSEC_UNDEF_SSCI ((__force ssci_t)0xffffffff) #define MACSEC_GCM_AES_128_SAK_LEN 16 #define MACSEC_GCM_AES_256_SAK_LEN 32 #define DEFAULT_SAK_LEN MACSEC_GCM_AES_128_SAK_LEN +#define DEFAULT_XPN false #define DEFAULT_SEND_SCI true #define DEFAULT_ENCRYPT false #define DEFAULT_ENCODING_SA 0 @@ -372,8 +385,8 @@ static const struct macsec_ops *macsec_get_ops(struct macsec_dev *macsec, return __macsec_get_ops(macsec->offload, macsec, ctx); } -/* validate MACsec packet according to IEEE 802.1AE-2006 9.12 */ -static bool macsec_validate_skb(struct sk_buff *skb, u16 icv_len) +/* validate MACsec packet according to IEEE 802.1AE-2018 9.12 */ +static bool macsec_validate_skb(struct sk_buff *skb, u16 icv_len, bool xpn) { struct macsec_eth_header *h = (struct macsec_eth_header *)skb->data; int len = skb->len - 2 * ETH_ALEN; @@ -398,8 +411,8 @@ static bool macsec_validate_skb(struct sk_buff *skb, u16 icv_len) if (h->unused) return false; - /* rx.pn != 0 (figure 10-5) */ - if (!h->packet_number) + /* rx.pn != 0 if not XPN (figure 10-5 with 802.11AEbw-2013 amendment) */ + if (!h->packet_number && !xpn) return false; /* length check, f) g) h) i) */ @@ -411,6 +424,15 @@ static bool macsec_validate_skb(struct sk_buff *skb, u16 icv_len) #define MACSEC_NEEDED_HEADROOM (macsec_extra_len(true)) #define MACSEC_NEEDED_TAILROOM MACSEC_STD_ICV_LEN +static void macsec_fill_iv_xpn(unsigned char *iv, ssci_t ssci, u64 pn, + salt_t salt) +{ + struct gcm_iv_xpn *gcm_iv = (struct gcm_iv_xpn *)iv; + + gcm_iv->ssci = ssci ^ salt.ssci; + gcm_iv->pn = cpu_to_be64(pn) ^ salt.pn; +} + static void macsec_fill_iv(unsigned char *iv, sci_t sci, u32 pn) { struct gcm_iv *gcm_iv = (struct gcm_iv *)iv; @@ -424,6 +446,11 @@ static struct macsec_eth_header *macsec_ethhdr(struct sk_buff *skb) return (struct macsec_eth_header *)skb_mac_header(skb); } +static sci_t dev_to_sci(struct net_device *dev, __be16 port) +{ + return make_sci(dev->dev_addr, port); +} + static void __macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa) { @@ -441,14 +468,19 @@ void macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa) } EXPORT_SYMBOL_GPL(macsec_pn_wrapped); -static u32 tx_sa_update_pn(struct macsec_tx_sa *tx_sa, struct macsec_secy *secy) +static pn_t tx_sa_update_pn(struct macsec_tx_sa *tx_sa, + struct macsec_secy *secy) { - u32 pn; + pn_t pn; spin_lock_bh(&tx_sa->lock); - pn = tx_sa->next_pn; - tx_sa->next_pn++; + pn = tx_sa->next_pn_halves; + if (secy->xpn) + tx_sa->next_pn++; + else + tx_sa->next_pn_halves.lower++; + if (tx_sa->next_pn == 0) __macsec_pn_wrapped(secy, tx_sa); spin_unlock_bh(&tx_sa->lock); @@ -563,7 +595,7 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, struct macsec_tx_sa *tx_sa; struct macsec_dev *macsec = macsec_priv(dev); bool sci_present; - u32 pn; + pn_t pn; secy = &macsec->secy; tx_sc = &secy->tx_sc; @@ -605,12 +637,12 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, memmove(hh, eth, 2 * ETH_ALEN); pn = tx_sa_update_pn(tx_sa, secy); - if (pn == 0) { + if (pn.full64 == 0) { macsec_txsa_put(tx_sa); kfree_skb(skb); return ERR_PTR(-ENOLINK); } - macsec_fill_sectag(hh, secy, pn, sci_present); + macsec_fill_sectag(hh, secy, pn.lower, sci_present); macsec_set_shortlen(hh, unprotected_len - 2 * ETH_ALEN); skb_put(skb, secy->icv_len); @@ -641,7 +673,10 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, return ERR_PTR(-ENOMEM); } - macsec_fill_iv(iv, secy->sci, pn); + if (secy->xpn) + macsec_fill_iv_xpn(iv, tx_sa->ssci, pn.full64, tx_sa->key.salt); + else + macsec_fill_iv(iv, secy->sci, pn.lower); sg_init_table(sg, ret); ret = skb_to_sgvec(skb, sg, 0, skb->len); @@ -693,13 +728,14 @@ static bool macsec_post_decrypt(struct sk_buff *skb, struct macsec_secy *secy, u u32 lowest_pn = 0; spin_lock(&rx_sa->lock); - if (rx_sa->next_pn >= secy->replay_window) - lowest_pn = rx_sa->next_pn - secy->replay_window; + if (rx_sa->next_pn_halves.lower >= secy->replay_window) + lowest_pn = rx_sa->next_pn_halves.lower - secy->replay_window; /* Now perform replay protection check again * (see IEEE 802.1AE-2006 figure 10-5) */ - if (secy->replay_protect && pn < lowest_pn) { + if (secy->replay_protect && pn < lowest_pn && + (!secy->xpn || pn_same_half(pn, lowest_pn))) { spin_unlock(&rx_sa->lock); u64_stats_update_begin(&rxsc_stats->syncp); rxsc_stats->stats.InPktsLate++; @@ -748,8 +784,15 @@ static bool macsec_post_decrypt(struct sk_buff *skb, struct macsec_secy *secy, u } u64_stats_update_end(&rxsc_stats->syncp); - if (pn >= rx_sa->next_pn) - rx_sa->next_pn = pn + 1; + // Instead of "pn >=" - to support pn overflow in xpn + if (pn + 1 > rx_sa->next_pn_halves.lower) { + rx_sa->next_pn_halves.lower = pn + 1; + } else if (secy->xpn && + !pn_same_half(pn, rx_sa->next_pn_halves.lower)) { + rx_sa->next_pn_halves.upper++; + rx_sa->next_pn_halves.lower = pn + 1; + } + spin_unlock(&rx_sa->lock); } @@ -836,6 +879,7 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb, unsigned char *iv; struct aead_request *req; struct macsec_eth_header *hdr; + u32 hdr_pn; u16 icv_len = secy->icv_len; macsec_skb_cb(skb)->valid = false; @@ -855,7 +899,21 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb, } hdr = (struct macsec_eth_header *)skb->data; - macsec_fill_iv(iv, sci, ntohl(hdr->packet_number)); + hdr_pn = ntohl(hdr->packet_number); + + if (secy->xpn) { + pn_t recovered_pn = rx_sa->next_pn_halves; + + recovered_pn.lower = hdr_pn; + if (hdr_pn < rx_sa->next_pn_halves.lower && + !pn_same_half(hdr_pn, rx_sa->next_pn_halves.lower)) + recovered_pn.upper++; + + macsec_fill_iv_xpn(iv, rx_sa->ssci, recovered_pn.full64, + rx_sa->key.salt); + } else { + macsec_fill_iv(iv, sci, hdr_pn); + } sg_init_table(sg, ret); ret = skb_to_sgvec(skb, sg, 0, skb->len); @@ -996,7 +1054,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) struct macsec_rxh_data *rxd; struct macsec_dev *macsec; sci_t sci; - u32 pn; + u32 hdr_pn; bool cbit; struct pcpu_rx_sc_stats *rxsc_stats; struct pcpu_secy_stats *secy_stats; @@ -1067,7 +1125,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) secy_stats = this_cpu_ptr(macsec->stats); rxsc_stats = this_cpu_ptr(rx_sc->stats); - if (!macsec_validate_skb(skb, secy->icv_len)) { + if (!macsec_validate_skb(skb, secy->icv_len, secy->xpn)) { u64_stats_update_begin(&secy_stats->syncp); secy_stats->stats.InPktsBadTag++; u64_stats_update_end(&secy_stats->syncp); @@ -1099,13 +1157,16 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) } /* First, PN check to avoid decrypting obviously wrong packets */ - pn = ntohl(hdr->packet_number); + hdr_pn = ntohl(hdr->packet_number); if (secy->replay_protect) { bool late; spin_lock(&rx_sa->lock); - late = rx_sa->next_pn >= secy->replay_window && - pn < (rx_sa->next_pn - secy->replay_window); + late = rx_sa->next_pn_halves.lower >= secy->replay_window && + hdr_pn < (rx_sa->next_pn_halves.lower - secy->replay_window); + + if (secy->xpn) + late = late && pn_same_half(rx_sa->next_pn_halves.lower, hdr_pn); spin_unlock(&rx_sa->lock); if (late) { @@ -1134,7 +1195,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) return RX_HANDLER_CONSUMED; } - if (!macsec_post_decrypt(skb, secy, pn)) + if (!macsec_post_decrypt(skb, secy, hdr_pn)) goto drop; deliver: @@ -1252,6 +1313,7 @@ static int init_rx_sa(struct macsec_rx_sa *rx_sa, char *sak, int key_len, return PTR_ERR(rx_sa->key.tfm); } + rx_sa->ssci = MACSEC_UNDEF_SSCI; rx_sa->active = false; rx_sa->next_pn = 1; refcount_set(&rx_sa->refcnt, 1); @@ -1350,6 +1412,7 @@ static int init_tx_sa(struct macsec_tx_sa *tx_sa, char *sak, int key_len, return PTR_ERR(tx_sa->key.tfm); } + tx_sa->ssci = MACSEC_UNDEF_SSCI; tx_sa->active = false; refcount_set(&tx_sa->refcnt, 1); spin_lock_init(&tx_sa->lock); @@ -1393,6 +1456,16 @@ static int nla_put_sci(struct sk_buff *skb, int attrtype, sci_t value, return nla_put_u64_64bit(skb, attrtype, (__force u64)value, padattr); } +static ssci_t nla_get_ssci(const struct nlattr *nla) +{ + return (__force ssci_t)nla_get_u32(nla); +} + +static int nla_put_ssci(struct sk_buff *skb, int attrtype, ssci_t value) +{ + return nla_put_u32(skb, attrtype, (__force u64)value); +} + static struct macsec_tx_sa *get_txsa_from_nl(struct net *net, struct nlattr **attrs, struct nlattr **tb_sa, @@ -1508,11 +1581,14 @@ static const struct nla_policy macsec_genl_rxsc_policy[NUM_MACSEC_RXSC_ATTR] = { static const struct nla_policy macsec_genl_sa_policy[NUM_MACSEC_SA_ATTR] = { [MACSEC_SA_ATTR_AN] = { .type = NLA_U8 }, [MACSEC_SA_ATTR_ACTIVE] = { .type = NLA_U8 }, - [MACSEC_SA_ATTR_PN] = { .type = NLA_U32 }, + [MACSEC_SA_ATTR_PN] = { .type = NLA_MIN_LEN, .len = 4 }, [MACSEC_SA_ATTR_KEYID] = { .type = NLA_BINARY, .len = MACSEC_KEYID_LEN, }, [MACSEC_SA_ATTR_KEY] = { .type = NLA_BINARY, .len = MACSEC_MAX_KEY_LEN, }, + [MACSEC_SA_ATTR_SSCI] = { .type = NLA_U32 }, + [MACSEC_SA_ATTR_SALT] = { .type = NLA_BINARY, + .len = MACSEC_SALT_LEN, }, }; static const struct nla_policy macsec_genl_offload_policy[NUM_MACSEC_OFFLOAD_ATTR] = { @@ -1585,7 +1661,8 @@ static bool validate_add_rxsa(struct nlattr **attrs) if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) return false; - if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0) + if (attrs[MACSEC_SA_ATTR_PN] && + *(u64 *)nla_data(attrs[MACSEC_SA_ATTR_PN]) == 0) return false; if (attrs[MACSEC_SA_ATTR_ACTIVE]) { @@ -1607,6 +1684,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) struct macsec_rx_sc *rx_sc; struct macsec_rx_sa *rx_sa; unsigned char assoc_num; + int pn_len; struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; struct nlattr *tb_sa[MACSEC_SA_ATTR_MAX + 1]; int err; @@ -1639,6 +1717,29 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } + pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN; + if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { + pr_notice("macsec: nl: add_rxsa: bad pn length: %d != %d\n", + nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len); + rtnl_unlock(); + return -EINVAL; + } + + if (secy->xpn) { + if (!tb_sa[MACSEC_SA_ATTR_SSCI] || !tb_sa[MACSEC_SA_ATTR_SALT]) { + rtnl_unlock(); + return -EINVAL; + } + + if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) { + pr_notice("macsec: nl: add_rxsa: bad salt length: %d != %d\n", + nla_len(tb_sa[MACSEC_SA_ATTR_SALT]), + MACSEC_SA_ATTR_SALT); + rtnl_unlock(); + return -EINVAL; + } + } + rx_sa = rtnl_dereference(rx_sc->sa[assoc_num]); if (rx_sa) { rtnl_unlock(); @@ -1661,7 +1762,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) if (tb_sa[MACSEC_SA_ATTR_PN]) { spin_lock_bh(&rx_sa->lock); - rx_sa->next_pn = nla_get_u32(tb_sa[MACSEC_SA_ATTR_PN]); + rx_sa->next_pn = nla_get_u64(tb_sa[MACSEC_SA_ATTR_PN]); spin_unlock_bh(&rx_sa->lock); } @@ -1691,6 +1792,12 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) goto cleanup; } + if (secy->xpn) { + rx_sa->ssci = nla_get_ssci(tb_sa[MACSEC_SA_ATTR_SSCI]); + nla_memcpy(rx_sa->key.salt.bytes, tb_sa[MACSEC_SA_ATTR_SALT], + MACSEC_SALT_LEN); + } + nla_memcpy(rx_sa->key.id, tb_sa[MACSEC_SA_ATTR_KEYID], MACSEC_KEYID_LEN); rcu_assign_pointer(rx_sc->sa[assoc_num], rx_sa); @@ -1815,6 +1922,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) struct macsec_tx_sc *tx_sc; struct macsec_tx_sa *tx_sa; unsigned char assoc_num; + int pn_len; struct nlattr *tb_sa[MACSEC_SA_ATTR_MAX + 1]; bool was_operational; int err; @@ -1847,6 +1955,29 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } + pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN; + if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { + pr_notice("macsec: nl: add_txsa: bad pn length: %d != %d\n", + nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len); + rtnl_unlock(); + return -EINVAL; + } + + if (secy->xpn) { + if (!tb_sa[MACSEC_SA_ATTR_SSCI] || !tb_sa[MACSEC_SA_ATTR_SALT]) { + rtnl_unlock(); + return -EINVAL; + } + + if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) { + pr_notice("macsec: nl: add_txsa: bad salt length: %d != %d\n", + nla_len(tb_sa[MACSEC_SA_ATTR_SALT]), + MACSEC_SA_ATTR_SALT); + rtnl_unlock(); + return -EINVAL; + } + } + tx_sa = rtnl_dereference(tx_sc->sa[assoc_num]); if (tx_sa) { rtnl_unlock(); @@ -1868,7 +1999,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) } spin_lock_bh(&tx_sa->lock); - tx_sa->next_pn = nla_get_u32(tb_sa[MACSEC_SA_ATTR_PN]); + tx_sa->next_pn = nla_get_u64(tb_sa[MACSEC_SA_ATTR_PN]); spin_unlock_bh(&tx_sa->lock); if (tb_sa[MACSEC_SA_ATTR_ACTIVE]) @@ -1899,6 +2030,12 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) goto cleanup; } + if (secy->xpn) { + tx_sa->ssci = nla_get_ssci(tb_sa[MACSEC_SA_ATTR_SSCI]); + nla_memcpy(tx_sa->key.salt.bytes, tb_sa[MACSEC_SA_ATTR_SALT], + MACSEC_SALT_LEN); + } + nla_memcpy(tx_sa->key.id, tb_sa[MACSEC_SA_ATTR_KEYID], MACSEC_KEYID_LEN); rcu_assign_pointer(tx_sc->sa[assoc_num], tx_sa); @@ -2105,7 +2242,9 @@ static bool validate_upd_sa(struct nlattr **attrs) { if (!attrs[MACSEC_SA_ATTR_AN] || attrs[MACSEC_SA_ATTR_KEY] || - attrs[MACSEC_SA_ATTR_KEYID]) + attrs[MACSEC_SA_ATTR_KEYID] || + attrs[MACSEC_SA_ATTR_SSCI] || + attrs[MACSEC_SA_ATTR_SALT]) return false; if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) @@ -2132,9 +2271,11 @@ static int macsec_upd_txsa(struct sk_buff *skb, struct genl_info *info) u8 assoc_num; struct nlattr *tb_sa[MACSEC_SA_ATTR_MAX + 1]; bool was_operational, was_active; - u32 prev_pn = 0; + pn_t prev_pn; int ret = 0; + prev_pn.full64 = 0; + if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; @@ -2153,9 +2294,19 @@ static int macsec_upd_txsa(struct sk_buff *skb, struct genl_info *info) } if (tb_sa[MACSEC_SA_ATTR_PN]) { + int pn_len; + + pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN; + if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { + pr_notice("macsec: nl: upd_txsa: bad pn length: %d != %d\n", + nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len); + rtnl_unlock(); + return -EINVAL; + } + spin_lock_bh(&tx_sa->lock); - prev_pn = tx_sa->next_pn; - tx_sa->next_pn = nla_get_u32(tb_sa[MACSEC_SA_ATTR_PN]); + prev_pn = tx_sa->next_pn_halves; + tx_sa->next_pn = nla_get_u64(tb_sa[MACSEC_SA_ATTR_PN]); spin_unlock_bh(&tx_sa->lock); } @@ -2193,7 +2344,7 @@ static int macsec_upd_txsa(struct sk_buff *skb, struct genl_info *info) cleanup: if (tb_sa[MACSEC_SA_ATTR_PN]) { spin_lock_bh(&tx_sa->lock); - tx_sa->next_pn = prev_pn; + tx_sa->next_pn_halves = prev_pn; spin_unlock_bh(&tx_sa->lock); } tx_sa->active = was_active; @@ -2213,9 +2364,11 @@ static int macsec_upd_rxsa(struct sk_buff *skb, struct genl_info *info) struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; struct nlattr *tb_sa[MACSEC_SA_ATTR_MAX + 1]; bool was_active; - u32 prev_pn = 0; + pn_t prev_pn; int ret = 0; + prev_pn.full64 = 0; + if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; @@ -2237,9 +2390,19 @@ static int macsec_upd_rxsa(struct sk_buff *skb, struct genl_info *info) } if (tb_sa[MACSEC_SA_ATTR_PN]) { + int pn_len; + + pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN; + if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { + pr_notice("macsec: nl: upd_rxsa: bad pn length: %d != %d\n", + nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len); + rtnl_unlock(); + return -EINVAL; + } + spin_lock_bh(&rx_sa->lock); - prev_pn = rx_sa->next_pn; - rx_sa->next_pn = nla_get_u32(tb_sa[MACSEC_SA_ATTR_PN]); + prev_pn = rx_sa->next_pn_halves; + rx_sa->next_pn = nla_get_u64(tb_sa[MACSEC_SA_ATTR_PN]); spin_unlock_bh(&rx_sa->lock); } @@ -2272,7 +2435,7 @@ static int macsec_upd_rxsa(struct sk_buff *skb, struct genl_info *info) cleanup: if (tb_sa[MACSEC_SA_ATTR_PN]) { spin_lock_bh(&rx_sa->lock); - rx_sa->next_pn = prev_pn; + rx_sa->next_pn_halves = prev_pn; spin_unlock_bh(&rx_sa->lock); } rx_sa->active = was_active; @@ -2686,10 +2849,10 @@ static int nla_put_secy(struct macsec_secy *secy, struct sk_buff *skb) switch (secy->key_len) { case MACSEC_GCM_AES_128_SAK_LEN: - csid = MACSEC_DEFAULT_CIPHER_ID; + csid = secy->xpn ? MACSEC_CIPHER_ID_GCM_AES_XPN_128 : MACSEC_DEFAULT_CIPHER_ID; break; case MACSEC_GCM_AES_256_SAK_LEN: - csid = MACSEC_CIPHER_ID_GCM_AES_256; + csid = secy->xpn ? MACSEC_CIPHER_ID_GCM_AES_XPN_256 : MACSEC_CIPHER_ID_GCM_AES_256; break; default: goto cancel; @@ -2780,6 +2943,8 @@ dump_secy(struct macsec_secy *secy, struct net_device *dev, for (i = 0, j = 1; i < MACSEC_NUM_AN; i++) { struct macsec_tx_sa *tx_sa = rtnl_dereference(tx_sc->sa[i]); struct nlattr *txsa_nest; + u64 pn; + int pn_len; if (!tx_sa) continue; @@ -2790,9 +2955,18 @@ dump_secy(struct macsec_secy *secy, struct net_device *dev, goto nla_put_failure; } + if (secy->xpn) { + pn = tx_sa->next_pn; + pn_len = MACSEC_XPN_PN_LEN; + } else { + pn = tx_sa->next_pn_halves.lower; + pn_len = MACSEC_DEFAULT_PN_LEN; + } + if (nla_put_u8(skb, MACSEC_SA_ATTR_AN, i) || - nla_put_u32(skb, MACSEC_SA_ATTR_PN, tx_sa->next_pn) || + nla_put(skb, MACSEC_SA_ATTR_PN, pn_len, &pn) || nla_put(skb, MACSEC_SA_ATTR_KEYID, MACSEC_KEYID_LEN, tx_sa->key.id) || + (secy->xpn && nla_put_ssci(skb, MACSEC_SA_ATTR_SSCI, tx_sa->ssci)) || nla_put_u8(skb, MACSEC_SA_ATTR_ACTIVE, tx_sa->active)) { nla_nest_cancel(skb, txsa_nest); nla_nest_cancel(skb, txsa_list); @@ -2865,6 +3039,8 @@ dump_secy(struct macsec_secy *secy, struct net_device *dev, for (i = 0, k = 1; i < MACSEC_NUM_AN; i++) { struct macsec_rx_sa *rx_sa = rtnl_dereference(rx_sc->sa[i]); struct nlattr *rxsa_nest; + u64 pn; + int pn_len; if (!rx_sa) continue; @@ -2894,9 +3070,18 @@ dump_secy(struct macsec_secy *secy, struct net_device *dev, } nla_nest_end(skb, attr); + if (secy->xpn) { + pn = rx_sa->next_pn; + pn_len = MACSEC_XPN_PN_LEN; + } else { + pn = rx_sa->next_pn_halves.lower; + pn_len = MACSEC_DEFAULT_PN_LEN; + } + if (nla_put_u8(skb, MACSEC_SA_ATTR_AN, i) || - nla_put_u32(skb, MACSEC_SA_ATTR_PN, rx_sa->next_pn) || + nla_put(skb, MACSEC_SA_ATTR_PN, pn_len, &pn) || nla_put(skb, MACSEC_SA_ATTR_KEYID, MACSEC_KEYID_LEN, rx_sa->key.id) || + (secy->xpn && nla_put_ssci(skb, MACSEC_SA_ATTR_SSCI, rx_sa->ssci)) || nla_put_u8(skb, MACSEC_SA_ATTR_ACTIVE, rx_sa->active)) { nla_nest_cancel(skb, rxsa_nest); nla_nest_cancel(skb, rxsc_nest); @@ -3268,6 +3453,20 @@ static int macsec_set_mac_address(struct net_device *dev, void *p) out: ether_addr_copy(dev->dev_addr, addr->sa_data); + macsec->secy.sci = dev_to_sci(dev, MACSEC_PORT_ES); + + /* If h/w offloading is available, propagate to the device */ + if (macsec_is_offloaded(macsec)) { + const struct macsec_ops *ops; + struct macsec_context ctx; + + ops = macsec_get_ops(macsec, &ctx); + if (ops) { + ctx.secy = &macsec->secy; + macsec_offload(ops->mdo_upd_secy, &ctx); + } + } + return 0; } @@ -3342,6 +3541,7 @@ static const struct device_type macsec_type = { static const struct nla_policy macsec_rtnl_policy[IFLA_MACSEC_MAX + 1] = { [IFLA_MACSEC_SCI] = { .type = NLA_U64 }, + [IFLA_MACSEC_PORT] = { .type = NLA_U16 }, [IFLA_MACSEC_ICV_LEN] = { .type = NLA_U8 }, [IFLA_MACSEC_CIPHER_SUITE] = { .type = NLA_U64 }, [IFLA_MACSEC_WINDOW] = { .type = NLA_U32 }, @@ -3425,9 +3625,19 @@ static int macsec_changelink_common(struct net_device *dev, case MACSEC_CIPHER_ID_GCM_AES_128: case MACSEC_DEFAULT_CIPHER_ID: secy->key_len = MACSEC_GCM_AES_128_SAK_LEN; + secy->xpn = false; break; case MACSEC_CIPHER_ID_GCM_AES_256: secy->key_len = MACSEC_GCM_AES_256_SAK_LEN; + secy->xpn = false; + break; + case MACSEC_CIPHER_ID_GCM_AES_XPN_128: + secy->key_len = MACSEC_GCM_AES_128_SAK_LEN; + secy->xpn = true; + break; + case MACSEC_CIPHER_ID_GCM_AES_XPN_256: + secy->key_len = MACSEC_GCM_AES_256_SAK_LEN; + secy->xpn = true; break; default: return -EINVAL; @@ -3592,11 +3802,6 @@ static bool sci_exists(struct net_device *dev, sci_t sci) return false; } -static sci_t dev_to_sci(struct net_device *dev, __be16 port) -{ - return make_sci(dev->dev_addr, port); -} - static int macsec_add_dev(struct net_device *dev, sci_t sci, u8 icv_len) { struct macsec_dev *macsec = macsec_priv(dev); @@ -3622,6 +3827,7 @@ static int macsec_add_dev(struct net_device *dev, sci_t sci, u8 icv_len) secy->validate_frames = MACSEC_VALIDATE_DEFAULT; secy->protect_frames = true; secy->replay_protect = false; + secy->xpn = DEFAULT_XPN; secy->sci = sci; secy->tx_sc.active = true; @@ -3751,6 +3957,8 @@ static int macsec_validate_attr(struct nlattr *tb[], struct nlattr *data[], switch (csid) { case MACSEC_CIPHER_ID_GCM_AES_128: case MACSEC_CIPHER_ID_GCM_AES_256: + case MACSEC_CIPHER_ID_GCM_AES_XPN_128: + case MACSEC_CIPHER_ID_GCM_AES_XPN_256: case MACSEC_DEFAULT_CIPHER_ID: if (icv_len < MACSEC_MIN_ICV_LEN || icv_len > MACSEC_STD_ICV_LEN) @@ -3824,10 +4032,10 @@ static int macsec_fill_info(struct sk_buff *skb, switch (secy->key_len) { case MACSEC_GCM_AES_128_SAK_LEN: - csid = MACSEC_DEFAULT_CIPHER_ID; + csid = secy->xpn ? MACSEC_CIPHER_ID_GCM_AES_XPN_128 : MACSEC_DEFAULT_CIPHER_ID; break; case MACSEC_GCM_AES_256_SAK_LEN: - csid = MACSEC_CIPHER_ID_GCM_AES_256; + csid = secy->xpn ? MACSEC_CIPHER_ID_GCM_AES_XPN_256 : MACSEC_CIPHER_ID_GCM_AES_256; break; default: goto nla_put_failure; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 81aa7adf4801..e7289d67268f 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -334,6 +334,8 @@ static void macvlan_process_broadcast(struct work_struct *w) if (src) dev_put(src->dev); consume_skb(skb); + + cond_resched(); } } diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index f81c47377f32..7bfd0622cef1 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -381,22 +381,29 @@ enum { #define NSIM_TRAP_DROP(_id, _group_id) \ DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \ - DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ NSIM_TRAP_METADATA) #define NSIM_TRAP_DROP_EXT(_id, _group_id, _metadata) \ DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \ - DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ NSIM_TRAP_METADATA | (_metadata)) #define NSIM_TRAP_EXCEPTION(_id, _group_id) \ DEVLINK_TRAP_GENERIC(EXCEPTION, TRAP, _id, \ - DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ NSIM_TRAP_METADATA) #define NSIM_TRAP_DRIVER_EXCEPTION(_id, _group_id) \ DEVLINK_TRAP_DRIVER(EXCEPTION, TRAP, NSIM_TRAP_ID_##_id, \ NSIM_TRAP_NAME_##_id, \ - DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ NSIM_TRAP_METADATA) +static const struct devlink_trap_group nsim_trap_groups_arr[] = { + DEVLINK_TRAP_GROUP_GENERIC(L2_DROPS), + DEVLINK_TRAP_GROUP_GENERIC(L3_DROPS), + DEVLINK_TRAP_GROUP_GENERIC(BUFFER_DROPS), + DEVLINK_TRAP_GROUP_GENERIC(ACL_DROPS), +}; + static const struct devlink_trap nsim_traps_arr[] = { NSIM_TRAP_DROP(SMAC_MC, L2_DROPS), NSIM_TRAP_DROP(VLAN_TAG_MISMATCH, L2_DROPS), @@ -556,10 +563,15 @@ static int nsim_dev_traps_init(struct devlink *devlink) nsim_trap_data->nsim_dev = nsim_dev; nsim_dev->trap_data = nsim_trap_data; + err = devlink_trap_groups_register(devlink, nsim_trap_groups_arr, + ARRAY_SIZE(nsim_trap_groups_arr)); + if (err) + goto err_trap_items_free; + err = devlink_traps_register(devlink, nsim_traps_arr, ARRAY_SIZE(nsim_traps_arr), NULL); if (err) - goto err_trap_items_free; + goto err_trap_groups_unregister; INIT_DELAYED_WORK(&nsim_dev->trap_data->trap_report_dw, nsim_dev_trap_report_work); @@ -568,6 +580,9 @@ static int nsim_dev_traps_init(struct devlink *devlink) return 0; +err_trap_groups_unregister: + devlink_trap_groups_unregister(devlink, nsim_trap_groups_arr, + ARRAY_SIZE(nsim_trap_groups_arr)); err_trap_items_free: kfree(nsim_trap_data->trap_items_arr); err_trap_data_free: @@ -582,6 +597,8 @@ static void nsim_dev_traps_exit(struct devlink *devlink) cancel_delayed_work_sync(&nsim_dev->trap_data->trap_report_dw); devlink_traps_unregister(devlink, nsim_traps_arr, ARRAY_SIZE(nsim_traps_arr)); + devlink_trap_groups_unregister(devlink, nsim_trap_groups_arr, + ARRAY_SIZE(nsim_trap_groups_arr)); kfree(nsim_dev->trap_data->trap_items_arr); kfree(nsim_dev->trap_data); } diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 26f8039f300f..70774ab474e6 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -89,7 +89,7 @@ obj-$(CONFIG_MICREL_KS8995MA) += spi_ks8995.o obj-$(CONFIG_MICREL_PHY) += micrel.o obj-$(CONFIG_MICROCHIP_PHY) += microchip.o obj-$(CONFIG_MICROCHIP_T1_PHY) += microchip_t1.o -obj-$(CONFIG_MICROSEMI_PHY) += mscc.o +obj-$(CONFIG_MICROSEMI_PHY) += mscc/ obj-$(CONFIG_NATIONAL_PHY) += national.o obj-$(CONFIG_NXP_TJA11XX_PHY) += nxp-tja11xx.o obj-$(CONFIG_QSEMI_PHY) += qsemi.o diff --git a/drivers/net/phy/aquantia_main.c b/drivers/net/phy/aquantia_main.c index 31927b2c7d5a..41e7c1432497 100644 --- a/drivers/net/phy/aquantia_main.c +++ b/drivers/net/phy/aquantia_main.c @@ -290,17 +290,6 @@ static int aqr_read_status(struct phy_device *phydev) return genphy_c45_read_status(phydev); } -static int aqr107_read_downshift_event(struct phy_device *phydev) -{ - int val; - - val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_TX_VEND_INT_STATUS1); - if (val < 0) - return val; - - return !!(val & MDIO_AN_TX_VEND_INT_STATUS1_DOWNSHIFT); -} - static int aqr107_read_rate(struct phy_device *phydev) { int val; @@ -377,13 +366,7 @@ static int aqr107_read_status(struct phy_device *phydev) break; } - val = aqr107_read_downshift_event(phydev); - if (val <= 0) - return val; - - phydev_warn(phydev, "Downshift occurred! Cabling may be defective.\n"); - - /* Read downshifted rate from vendor register */ + /* Read possibly downshifted rate from vendor register */ return aqr107_read_rate(phydev); } @@ -451,16 +434,11 @@ static int aqr107_set_tunable(struct phy_device *phydev, */ static int aqr107_wait_reset_complete(struct phy_device *phydev) { - int val, retries = 100; - - do { - val = phy_read_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_FW_ID); - if (val < 0) - return val; - msleep(20); - } while (!val && --retries); + int val; - return val ? 0 : -ETIMEDOUT; + return phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1, + VEND1_GLOBAL_FW_ID, val, val != 0, + 20000, 2000000, false); } static void aqr107_chip_info(struct phy_device *phydev) @@ -506,9 +484,6 @@ static int aqr107_config_init(struct phy_device *phydev) if (!ret) aqr107_chip_info(phydev); - /* ensure that a latched downshift event is cleared */ - aqr107_read_downshift_event(phydev); - return aqr107_set_downshift(phydev, MDIO_AN_VEND_PROV_DOWNSHIFT_DFLT); } @@ -533,9 +508,6 @@ static int aqcs109_config_init(struct phy_device *phydev) if (ret) return ret; - /* ensure that a latched downshift event is cleared */ - aqr107_read_downshift_event(phydev); - return aqr107_set_downshift(phydev, MDIO_AN_VEND_PROV_DOWNSHIFT_DFLT); } diff --git a/drivers/net/phy/bcm63xx.c b/drivers/net/phy/bcm63xx.c index 23f1958ba6ad..459fb2069c7e 100644 --- a/drivers/net/phy/bcm63xx.c +++ b/drivers/net/phy/bcm63xx.c @@ -73,6 +73,7 @@ static struct phy_driver bcm63xx_driver[] = { /* same phy as above, with just a different OUI */ .phy_id = 0x002bdc00, .phy_id_mask = 0xfffffc00, + .name = "Broadcom BCM63XX (2)", /* PHY_BASIC_FEATURES */ .flags = PHY_IS_INTERNAL, .config_init = bcm63xx_config_init, diff --git a/drivers/net/phy/bcm84881.c b/drivers/net/phy/bcm84881.c index 14d55a77eb28..3840d2adbbb9 100644 --- a/drivers/net/phy/bcm84881.c +++ b/drivers/net/phy/bcm84881.c @@ -22,30 +22,11 @@ enum { static int bcm84881_wait_init(struct phy_device *phydev) { - unsigned int tries = 20; - int ret, val; - - do { - val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_CTRL1); - if (val < 0) { - ret = val; - break; - } - if (!(val & MDIO_CTRL1_RESET)) { - ret = 0; - break; - } - if (!--tries) { - ret = -ETIMEDOUT; - break; - } - msleep(100); - } while (1); + int val; - if (ret) - phydev_err(phydev, "%s failed: %d\n", __func__, ret); - - return ret; + return phy_read_mmd_poll_timeout(phydev, MDIO_MMD_PMAPMD, MDIO_CTRL1, + val, !(val & MDIO_CTRL1_RESET), + 100000, 2000000, false); } static int bcm84881_config_init(struct phy_device *phydev) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 9a8badafea8a..4714ca0e0d4b 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -867,21 +867,6 @@ static int m88e1011_set_tunable(struct phy_device *phydev, } } -static void m88e1011_link_change_notify(struct phy_device *phydev) -{ - int status; - - if (phydev->state != PHY_RUNNING) - return; - - /* we may be on fiber page currently */ - status = phy_read_paged(phydev, MII_MARVELL_COPPER_PAGE, - MII_M1011_PHY_SSR); - - if (status > 0 && status & MII_M1011_PHY_SSR_DOWNSHIFT) - phydev_warn(phydev, "Downshift occurred! Cabling may be defective.\n"); -} - static int m88e1116r_config_init(struct phy_device *phydev) { int err; @@ -2201,7 +2186,6 @@ static struct phy_driver marvell_drivers[] = { .get_stats = marvell_get_stats, .get_tunable = m88e1011_get_tunable, .set_tunable = m88e1011_set_tunable, - .link_change_notify = m88e1011_link_change_notify, }, { .phy_id = MARVELL_PHY_ID_88E1111, @@ -2223,7 +2207,6 @@ static struct phy_driver marvell_drivers[] = { .get_stats = marvell_get_stats, .get_tunable = m88e1111_get_tunable, .set_tunable = m88e1111_set_tunable, - .link_change_notify = m88e1011_link_change_notify, }, { .phy_id = MARVELL_PHY_ID_88E1118, @@ -2264,7 +2247,6 @@ static struct phy_driver marvell_drivers[] = { .get_stats = marvell_get_stats, .get_tunable = m88e1011_get_tunable, .set_tunable = m88e1011_set_tunable, - .link_change_notify = m88e1011_link_change_notify, }, { .phy_id = MARVELL_PHY_ID_88E1318S, @@ -2308,7 +2290,6 @@ static struct phy_driver marvell_drivers[] = { .get_stats = marvell_get_stats, .get_tunable = m88e1111_get_tunable, .set_tunable = m88e1111_set_tunable, - .link_change_notify = m88e1011_link_change_notify, }, { .phy_id = MARVELL_PHY_ID_88E1149R, @@ -2364,7 +2345,6 @@ static struct phy_driver marvell_drivers[] = { .get_stats = marvell_get_stats, .get_tunable = m88e1011_get_tunable, .set_tunable = m88e1011_set_tunable, - .link_change_notify = m88e1011_link_change_notify, }, { .phy_id = MARVELL_PHY_ID_88E1510, @@ -2390,7 +2370,6 @@ static struct phy_driver marvell_drivers[] = { .set_loopback = genphy_loopback, .get_tunable = m88e1011_get_tunable, .set_tunable = m88e1011_set_tunable, - .link_change_notify = m88e1011_link_change_notify, }, { .phy_id = MARVELL_PHY_ID_88E1540, @@ -2413,7 +2392,6 @@ static struct phy_driver marvell_drivers[] = { .get_stats = marvell_get_stats, .get_tunable = m88e1540_get_tunable, .set_tunable = m88e1540_set_tunable, - .link_change_notify = m88e1011_link_change_notify, }, { .phy_id = MARVELL_PHY_ID_88E1545, @@ -2436,7 +2414,6 @@ static struct phy_driver marvell_drivers[] = { .get_stats = marvell_get_stats, .get_tunable = m88e1540_get_tunable, .set_tunable = m88e1540_set_tunable, - .link_change_notify = m88e1011_link_change_notify, }, { .phy_id = MARVELL_PHY_ID_88E3016, @@ -2479,7 +2456,6 @@ static struct phy_driver marvell_drivers[] = { .get_stats = marvell_get_stats, .get_tunable = m88e1540_get_tunable, .set_tunable = m88e1540_set_tunable, - .link_change_notify = m88e1011_link_change_notify, }, }; diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 7e05b92504f0..7621badae64d 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -241,22 +241,17 @@ static int mv3310_power_up(struct phy_device *phydev) static int mv3310_reset(struct phy_device *phydev, u32 unit) { - int retries, val, err; + int val, err; err = phy_modify_mmd(phydev, MDIO_MMD_PCS, unit + MDIO_CTRL1, MDIO_CTRL1_RESET, MDIO_CTRL1_RESET); if (err < 0) return err; - retries = 20; - do { - msleep(5); - val = phy_read_mmd(phydev, MDIO_MMD_PCS, unit + MDIO_CTRL1); - if (val < 0) - return val; - } while (val & MDIO_CTRL1_RESET && --retries); - - return val & MDIO_CTRL1_RESET ? -ETIMEDOUT : 0; + return phy_read_mmd_poll_timeout(phydev, MDIO_MMD_PCS, + unit + MDIO_CTRL1, val, + !(val & MDIO_CTRL1_RESET), + 5000, 100000, true); } static int mv3310_get_edpd(struct phy_device *phydev, u16 *edpd) diff --git a/drivers/net/phy/mdio-xpcs.c b/drivers/net/phy/mdio-xpcs.c index 973f588146f7..0d66a8ba7eb6 100644 --- a/drivers/net/phy/mdio-xpcs.c +++ b/drivers/net/phy/mdio-xpcs.c @@ -14,6 +14,7 @@ #define SYNOPSYS_XPCS_USXGMII_ID 0x7996ced0 #define SYNOPSYS_XPCS_10GKR_ID 0x7996ced0 +#define SYNOPSYS_XPCS_XLGMII_ID 0x7996ced0 #define SYNOPSYS_XPCS_MASK 0xffffffff /* Vendor regs access */ @@ -74,6 +75,36 @@ static const int xpcs_10gkr_features[] = { __ETHTOOL_LINK_MODE_MASK_NBITS, }; +static const int xpcs_xlgmii_features[] = { + ETHTOOL_LINK_MODE_Pause_BIT, + ETHTOOL_LINK_MODE_Asym_Pause_BIT, + ETHTOOL_LINK_MODE_25000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT, + ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT, + ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT, + ETHTOOL_LINK_MODE_50000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseDR_Full_BIT, + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT, + __ETHTOOL_LINK_MODE_MASK_NBITS, +}; + static const phy_interface_t xpcs_usxgmii_interfaces[] = { PHY_INTERFACE_MODE_USXGMII, PHY_INTERFACE_MODE_MAX, @@ -84,6 +115,11 @@ static const phy_interface_t xpcs_10gkr_interfaces[] = { PHY_INTERFACE_MODE_MAX, }; +static const phy_interface_t xpcs_xlgmii_interfaces[] = { + PHY_INTERFACE_MODE_XLGMII, + PHY_INTERFACE_MODE_MAX, +}; + static struct xpcs_id { u32 id; u32 mask; @@ -100,6 +136,11 @@ static struct xpcs_id { .mask = SYNOPSYS_XPCS_MASK, .supported = xpcs_10gkr_features, .interface = xpcs_10gkr_interfaces, + }, { + .id = SYNOPSYS_XPCS_XLGMII_ID, + .mask = SYNOPSYS_XPCS_MASK, + .supported = xpcs_xlgmii_features, + .interface = xpcs_xlgmii_interfaces, }, }; @@ -214,8 +255,10 @@ static int xpcs_read_fault(struct mdio_xpcs_args *xpcs, if (ret < 0) return ret; - if (ret & MDIO_PCS_10GBRT_STAT2_ERR) + if (ret & MDIO_PCS_10GBRT_STAT2_ERR) { xpcs_warn(xpcs, state, "Link has errors!\n"); + return -EFAULT; + } return 0; } @@ -390,8 +433,10 @@ static int xpcs_aneg_done(struct mdio_xpcs_args *xpcs, return ret; /* Check if Aneg outcome is valid */ - if (!(ret & DW_C73_AN_ADV_SF)) + if (!(ret & DW_C73_AN_ADV_SF)) { + xpcs_config_aneg(xpcs); return 0; + } return 1; } @@ -458,6 +503,60 @@ static void xpcs_resolve_lpa(struct mdio_xpcs_args *xpcs, state->duplex = DUPLEX_FULL; } +static int xpcs_get_max_xlgmii_speed(struct mdio_xpcs_args *xpcs, + struct phylink_link_state *state) +{ + unsigned long *adv = state->advertising; + int speed = SPEED_UNKNOWN; + int bit; + + for_each_set_bit(bit, adv, __ETHTOOL_LINK_MODE_MASK_NBITS) { + int new_speed = SPEED_UNKNOWN; + + switch (bit) { + case ETHTOOL_LINK_MODE_25000baseCR_Full_BIT: + case ETHTOOL_LINK_MODE_25000baseKR_Full_BIT: + case ETHTOOL_LINK_MODE_25000baseSR_Full_BIT: + new_speed = SPEED_25000; + break; + case ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT: + case ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT: + case ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT: + case ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT: + new_speed = SPEED_40000; + break; + case ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT: + case ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT: + case ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT: + case ETHTOOL_LINK_MODE_50000baseKR_Full_BIT: + case ETHTOOL_LINK_MODE_50000baseSR_Full_BIT: + case ETHTOOL_LINK_MODE_50000baseCR_Full_BIT: + case ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT: + case ETHTOOL_LINK_MODE_50000baseDR_Full_BIT: + new_speed = SPEED_50000; + break; + case ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT: + case ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT: + case ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT: + case ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT: + case ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT: + case ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT: + case ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT: + case ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT: + case ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT: + new_speed = SPEED_100000; + break; + default: + continue; + } + + if (new_speed > speed) + speed = new_speed; + } + + return speed; +} + static void xpcs_resolve_pma(struct mdio_xpcs_args *xpcs, struct phylink_link_state *state) { @@ -468,6 +567,9 @@ static void xpcs_resolve_pma(struct mdio_xpcs_args *xpcs, case PHY_INTERFACE_MODE_10GKR: state->speed = SPEED_10000; break; + case PHY_INTERFACE_MODE_XLGMII: + state->speed = xpcs_get_max_xlgmii_speed(xpcs, state); + break; default: state->speed = SPEED_UNKNOWN; break; @@ -517,10 +619,12 @@ static int xpcs_get_state(struct mdio_xpcs_args *xpcs, return xpcs_config(xpcs, state); } - if (state->link && state->an_enabled && xpcs_aneg_done(xpcs, state)) { + if (state->an_enabled && xpcs_aneg_done(xpcs, state)) { state->an_complete = true; xpcs_read_lpa(xpcs, state); xpcs_resolve_lpa(xpcs, state); + } else if (state->an_enabled) { + state->link = 0; } else if (state->link) { xpcs_resolve_pma(xpcs, state); } @@ -588,7 +692,7 @@ static int xpcs_probe(struct mdio_xpcs_args *xpcs, phy_interface_t interface) match = entry; if (xpcs_check_features(xpcs, match, interface)) - return 0; + return xpcs_soft_reset(xpcs, MDIO_MMD_PCS); } } diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 3ab9ca7614d1..522760c8bca6 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -825,6 +825,38 @@ int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val) EXPORT_SYMBOL(__mdiobus_write); /** + * __mdiobus_modify_changed - Unlocked version of the mdiobus_modify function + * @bus: the mii_bus struct + * @addr: the phy address + * @regnum: register number to modify + * @mask: bit mask of bits to clear + * @set: bit mask of bits to set + * + * Read, modify, and if any change, write the register value back to the + * device. Any error returns a negative number. + * + * NOTE: MUST NOT be called from interrupt context. + */ +int __mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum, + u16 mask, u16 set) +{ + int new, ret; + + ret = __mdiobus_read(bus, addr, regnum); + if (ret < 0) + return ret; + + new = (ret & ~mask) | set; + if (new == ret) + return 0; + + ret = __mdiobus_write(bus, addr, regnum, new); + + return ret < 0 ? ret : 1; +} +EXPORT_SYMBOL_GPL(__mdiobus_modify_changed); + +/** * mdiobus_read_nested - Nested version of the mdiobus_read function * @bus: the mii_bus struct * @addr: the phy address @@ -841,7 +873,8 @@ int mdiobus_read_nested(struct mii_bus *bus, int addr, u32 regnum) { int retval; - BUG_ON(in_interrupt()); + if (WARN_ON_ONCE(in_interrupt())) + return -EINVAL; mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED); retval = __mdiobus_read(bus, addr, regnum); @@ -865,7 +898,8 @@ int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum) { int retval; - BUG_ON(in_interrupt()); + if (WARN_ON_ONCE(in_interrupt())) + return -EINVAL; mutex_lock(&bus->mdio_lock); retval = __mdiobus_read(bus, addr, regnum); @@ -893,7 +927,8 @@ int mdiobus_write_nested(struct mii_bus *bus, int addr, u32 regnum, u16 val) { int err; - BUG_ON(in_interrupt()); + if (WARN_ON_ONCE(in_interrupt())) + return -EINVAL; mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED); err = __mdiobus_write(bus, addr, regnum, val); @@ -918,7 +953,8 @@ int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val) { int err; - BUG_ON(in_interrupt()); + if (WARN_ON_ONCE(in_interrupt())) + return -EINVAL; mutex_lock(&bus->mdio_lock); err = __mdiobus_write(bus, addr, regnum, val); @@ -929,6 +965,30 @@ int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val) EXPORT_SYMBOL(mdiobus_write); /** + * mdiobus_modify - Convenience function for modifying a given mdio device + * register + * @bus: the mii_bus struct + * @addr: the phy address + * @regnum: register number to write + * @mask: bit mask of bits to clear + * @set: bit mask of bits to set + */ +int mdiobus_modify(struct mii_bus *bus, int addr, u32 regnum, u16 mask, u16 set) +{ + int err; + + if (WARN_ON_ONCE(in_interrupt())) + return -EINVAL; + + mutex_lock(&bus->mdio_lock); + err = __mdiobus_modify_changed(bus, addr, regnum, mask, set); + mutex_unlock(&bus->mdio_lock); + + return err < 0 ? err : 0; +} +EXPORT_SYMBOL_GPL(mdiobus_modify); + +/** * mdio_bus_match - determine if given MDIO driver supports the given * MDIO device * @dev: target MDIO device diff --git a/drivers/net/phy/mscc/Makefile b/drivers/net/phy/mscc/Makefile new file mode 100644 index 000000000000..10af42cd9839 --- /dev/null +++ b/drivers/net/phy/mscc/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for MSCC networking PHY driver + +obj-$(CONFIG_MICROSEMI_PHY) := mscc.o +mscc-objs := mscc_main.o + +ifdef CONFIG_MACSEC +mscc-objs += mscc_macsec.o +endif diff --git a/drivers/net/phy/mscc/mscc.h b/drivers/net/phy/mscc/mscc.h new file mode 100644 index 000000000000..d983d3af66d6 --- /dev/null +++ b/drivers/net/phy/mscc/mscc.h @@ -0,0 +1,410 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +/* + * Driver for Microsemi VSC85xx PHYs + * + * Copyright (c) 2016 Microsemi Corporation + */ + +#ifndef _MSCC_PHY_H_ +#define _MSCC_PHY_H_ + +#if IS_ENABLED(CONFIG_MACSEC) +#include "mscc_macsec.h" +#endif + +enum rgmii_clock_delay { + RGMII_CLK_DELAY_0_2_NS = 0, + RGMII_CLK_DELAY_0_8_NS = 1, + RGMII_CLK_DELAY_1_1_NS = 2, + RGMII_CLK_DELAY_1_7_NS = 3, + RGMII_CLK_DELAY_2_0_NS = 4, + RGMII_CLK_DELAY_2_3_NS = 5, + RGMII_CLK_DELAY_2_6_NS = 6, + RGMII_CLK_DELAY_3_4_NS = 7 +}; + +/* Microsemi VSC85xx PHY registers */ +/* IEEE 802. Std Registers */ +#define MSCC_PHY_BYPASS_CONTROL 18 +#define DISABLE_HP_AUTO_MDIX_MASK 0x0080 +#define DISABLE_PAIR_SWAP_CORR_MASK 0x0020 +#define DISABLE_POLARITY_CORR_MASK 0x0010 +#define PARALLEL_DET_IGNORE_ADVERTISED 0x0008 + +#define MSCC_PHY_EXT_CNTL_STATUS 22 +#define SMI_BROADCAST_WR_EN 0x0001 + +#define MSCC_PHY_ERR_RX_CNT 19 +#define MSCC_PHY_ERR_FALSE_CARRIER_CNT 20 +#define MSCC_PHY_ERR_LINK_DISCONNECT_CNT 21 +#define ERR_CNT_MASK GENMASK(7, 0) + +#define MSCC_PHY_EXT_PHY_CNTL_1 23 +#define MAC_IF_SELECTION_MASK 0x1800 +#define MAC_IF_SELECTION_GMII 0 +#define MAC_IF_SELECTION_RMII 1 +#define MAC_IF_SELECTION_RGMII 2 +#define MAC_IF_SELECTION_POS 11 +#define VSC8584_MAC_IF_SELECTION_MASK 0x1000 +#define VSC8584_MAC_IF_SELECTION_SGMII 0 +#define VSC8584_MAC_IF_SELECTION_1000BASEX 1 +#define VSC8584_MAC_IF_SELECTION_POS 12 +#define FAR_END_LOOPBACK_MODE_MASK 0x0008 +#define MEDIA_OP_MODE_MASK 0x0700 +#define MEDIA_OP_MODE_COPPER 0 +#define MEDIA_OP_MODE_SERDES 1 +#define MEDIA_OP_MODE_1000BASEX 2 +#define MEDIA_OP_MODE_100BASEFX 3 +#define MEDIA_OP_MODE_AMS_COPPER_SERDES 5 +#define MEDIA_OP_MODE_AMS_COPPER_1000BASEX 6 +#define MEDIA_OP_MODE_AMS_COPPER_100BASEFX 7 +#define MEDIA_OP_MODE_POS 8 + +#define MSCC_PHY_EXT_PHY_CNTL_2 24 + +#define MII_VSC85XX_INT_MASK 25 +#define MII_VSC85XX_INT_MASK_MDINT BIT(15) +#define MII_VSC85XX_INT_MASK_LINK_CHG BIT(13) +#define MII_VSC85XX_INT_MASK_WOL BIT(6) +#define MII_VSC85XX_INT_MASK_EXT BIT(5) +#define MII_VSC85XX_INT_STATUS 26 + +#define MII_VSC85XX_INT_MASK_MASK (MII_VSC85XX_INT_MASK_MDINT | \ + MII_VSC85XX_INT_MASK_LINK_CHG | \ + MII_VSC85XX_INT_MASK_EXT) + +#define MSCC_PHY_WOL_MAC_CONTROL 27 +#define EDGE_RATE_CNTL_POS 5 +#define EDGE_RATE_CNTL_MASK 0x00E0 + +#define MSCC_PHY_DEV_AUX_CNTL 28 +#define HP_AUTO_MDIX_X_OVER_IND_MASK 0x2000 + +#define MSCC_PHY_LED_MODE_SEL 29 +#define LED_MODE_SEL_POS(x) ((x) * 4) +#define LED_MODE_SEL_MASK(x) (GENMASK(3, 0) << LED_MODE_SEL_POS(x)) +#define LED_MODE_SEL(x, mode) (((mode) << LED_MODE_SEL_POS(x)) & LED_MODE_SEL_MASK(x)) + +#define MSCC_EXT_PAGE_CSR_CNTL_17 17 +#define MSCC_EXT_PAGE_CSR_CNTL_18 18 + +#define MSCC_EXT_PAGE_CSR_CNTL_19 19 +#define MSCC_PHY_CSR_CNTL_19_REG_ADDR(x) (x) +#define MSCC_PHY_CSR_CNTL_19_TARGET(x) ((x) << 12) +#define MSCC_PHY_CSR_CNTL_19_READ BIT(14) +#define MSCC_PHY_CSR_CNTL_19_CMD BIT(15) + +#define MSCC_EXT_PAGE_CSR_CNTL_20 20 +#define MSCC_PHY_CSR_CNTL_20_TARGET(x) (x) + +#define PHY_MCB_TARGET 0x07 +#define PHY_MCB_S6G_WRITE BIT(31) +#define PHY_MCB_S6G_READ BIT(30) + +#define PHY_S6G_PLL5G_CFG0 0x06 +#define PHY_S6G_LCPLL_CFG 0x11 +#define PHY_S6G_PLL_CFG 0x2b +#define PHY_S6G_COMMON_CFG 0x2c +#define PHY_S6G_GPC_CFG 0x2e +#define PHY_S6G_MISC_CFG 0x3b +#define PHY_MCB_S6G_CFG 0x3f +#define PHY_S6G_DFT_CFG2 0x3e +#define PHY_S6G_PLL_STATUS 0x31 +#define PHY_S6G_IB_STATUS0 0x2f + +#define PHY_S6G_SYS_RST_POS 31 +#define PHY_S6G_ENA_LANE_POS 18 +#define PHY_S6G_ENA_LOOP_POS 8 +#define PHY_S6G_QRATE_POS 6 +#define PHY_S6G_IF_MODE_POS 4 +#define PHY_S6G_PLL_ENA_OFFS_POS 21 +#define PHY_S6G_PLL_FSM_CTRL_DATA_POS 8 +#define PHY_S6G_PLL_FSM_ENA_POS 7 + +#define MSCC_EXT_PAGE_ACCESS 31 +#define MSCC_PHY_PAGE_STANDARD 0x0000 /* Standard registers */ +#define MSCC_PHY_PAGE_EXTENDED 0x0001 /* Extended registers */ +#define MSCC_PHY_PAGE_EXTENDED_2 0x0002 /* Extended reg - page 2 */ +#define MSCC_PHY_PAGE_EXTENDED_3 0x0003 /* Extended reg - page 3 */ +#define MSCC_PHY_PAGE_EXTENDED_4 0x0004 /* Extended reg - page 4 */ +#define MSCC_PHY_PAGE_CSR_CNTL MSCC_PHY_PAGE_EXTENDED_4 +#define MSCC_PHY_PAGE_MACSEC MSCC_PHY_PAGE_EXTENDED_4 +/* Extended reg - GPIO; this is a bank of registers that are shared for all PHYs + * in the same package. + */ +#define MSCC_PHY_PAGE_EXTENDED_GPIO 0x0010 /* Extended reg - GPIO */ +#define MSCC_PHY_PAGE_TEST 0x2a30 /* Test reg */ +#define MSCC_PHY_PAGE_TR 0x52b5 /* Token ring registers */ + +/* Extended Page 1 Registers */ +#define MSCC_PHY_CU_MEDIA_CRC_VALID_CNT 18 +#define VALID_CRC_CNT_CRC_MASK GENMASK(13, 0) + +#define MSCC_PHY_EXT_MODE_CNTL 19 +#define FORCE_MDI_CROSSOVER_MASK 0x000C +#define FORCE_MDI_CROSSOVER_MDIX 0x000C +#define FORCE_MDI_CROSSOVER_MDI 0x0008 + +#define MSCC_PHY_ACTIPHY_CNTL 20 +#define PHY_ADDR_REVERSED 0x0200 +#define DOWNSHIFT_CNTL_MASK 0x001C +#define DOWNSHIFT_EN 0x0010 +#define DOWNSHIFT_CNTL_POS 2 + +#define MSCC_PHY_EXT_PHY_CNTL_4 23 +#define PHY_CNTL_4_ADDR_POS 11 + +#define MSCC_PHY_VERIPHY_CNTL_2 25 + +#define MSCC_PHY_VERIPHY_CNTL_3 26 + +/* Extended Page 2 Registers */ +#define MSCC_PHY_CU_PMD_TX_CNTL 16 + +#define MSCC_PHY_RGMII_SETTINGS 18 +#define RGMII_SKEW_RX_POS 1 +#define RGMII_SKEW_TX_POS 4 + +/* RGMII skew values, in ns */ +#define VSC8584_RGMII_SKEW_0_2 0 +#define VSC8584_RGMII_SKEW_0_8 1 +#define VSC8584_RGMII_SKEW_1_1 2 +#define VSC8584_RGMII_SKEW_1_7 3 +#define VSC8584_RGMII_SKEW_2_0 4 +#define VSC8584_RGMII_SKEW_2_3 5 +#define VSC8584_RGMII_SKEW_2_6 6 +#define VSC8584_RGMII_SKEW_3_4 7 + +#define MSCC_PHY_RGMII_CNTL 20 +#define RGMII_RX_CLK_DELAY_MASK 0x0070 +#define RGMII_RX_CLK_DELAY_POS 4 +#define RGMII_TX_CLK_DELAY_MASK 0x0007 +#define RGMII_TX_CLK_DELAY_POS 0 + +#define MSCC_PHY_WOL_LOWER_MAC_ADDR 21 +#define MSCC_PHY_WOL_MID_MAC_ADDR 22 +#define MSCC_PHY_WOL_UPPER_MAC_ADDR 23 +#define MSCC_PHY_WOL_LOWER_PASSWD 24 +#define MSCC_PHY_WOL_MID_PASSWD 25 +#define MSCC_PHY_WOL_UPPER_PASSWD 26 + +#define MSCC_PHY_WOL_MAC_CONTROL 27 +#define SECURE_ON_ENABLE 0x8000 +#define SECURE_ON_PASSWD_LEN_4 0x4000 + +#define MSCC_PHY_EXTENDED_INT 28 +#define MSCC_PHY_EXTENDED_INT_MS_EGR BIT(9) + +/* Extended Page 3 Registers */ +#define MSCC_PHY_SERDES_TX_VALID_CNT 21 +#define MSCC_PHY_SERDES_TX_CRC_ERR_CNT 22 +#define MSCC_PHY_SERDES_RX_VALID_CNT 28 +#define MSCC_PHY_SERDES_RX_CRC_ERR_CNT 29 + +/* Extended page GPIO Registers */ +#define MSCC_DW8051_CNTL_STATUS 0 +#define MICRO_NSOFT_RESET 0x8000 +#define RUN_FROM_INT_ROM 0x4000 +#define AUTOINC_ADDR 0x2000 +#define PATCH_RAM_CLK 0x1000 +#define MICRO_PATCH_EN 0x0080 +#define DW8051_CLK_EN 0x0010 +#define MICRO_CLK_EN 0x0008 +#define MICRO_CLK_DIVIDE(x) ((x) >> 1) +#define MSCC_DW8051_VLD_MASK 0xf1ff + +/* x Address in range 1-4 */ +#define MSCC_TRAP_ROM_ADDR(x) ((x) * 2 + 1) +#define MSCC_PATCH_RAM_ADDR(x) (((x) + 1) * 2) +#define MSCC_INT_MEM_ADDR 11 + +#define MSCC_INT_MEM_CNTL 12 +#define READ_SFR 0x6000 +#define READ_PRAM 0x4000 +#define READ_ROM 0x2000 +#define READ_RAM 0x0000 +#define INT_MEM_WRITE_EN 0x1000 +#define EN_PATCH_RAM_TRAP_ADDR(x) (0x0100 << ((x) - 1)) +#define INT_MEM_DATA_M 0x00ff +#define INT_MEM_DATA(x) (INT_MEM_DATA_M & (x)) + +#define MSCC_PHY_PROC_CMD 18 +#define PROC_CMD_NCOMPLETED 0x8000 +#define PROC_CMD_FAILED 0x4000 +#define PROC_CMD_SGMII_PORT(x) ((x) << 8) +#define PROC_CMD_FIBER_PORT(x) (0x0100 << (x) % 4) +#define PROC_CMD_QSGMII_PORT 0x0c00 +#define PROC_CMD_RST_CONF_PORT 0x0080 +#define PROC_CMD_RECONF_PORT 0x0000 +#define PROC_CMD_READ_MOD_WRITE_PORT 0x0040 +#define PROC_CMD_WRITE 0x0040 +#define PROC_CMD_READ 0x0000 +#define PROC_CMD_FIBER_DISABLE 0x0020 +#define PROC_CMD_FIBER_100BASE_FX 0x0010 +#define PROC_CMD_FIBER_1000BASE_X 0x0000 +#define PROC_CMD_SGMII_MAC 0x0030 +#define PROC_CMD_QSGMII_MAC 0x0020 +#define PROC_CMD_NO_MAC_CONF 0x0000 +#define PROC_CMD_1588_DEFAULT_INIT 0x0010 +#define PROC_CMD_NOP 0x000f +#define PROC_CMD_PHY_INIT 0x000a +#define PROC_CMD_CRC16 0x0008 +#define PROC_CMD_FIBER_MEDIA_CONF 0x0001 +#define PROC_CMD_MCB_ACCESS_MAC_CONF 0x0000 +#define PROC_CMD_NCOMPLETED_TIMEOUT_MS 500 + +#define MSCC_PHY_MAC_CFG_FASTLINK 19 +#define MAC_CFG_MASK 0xc000 +#define MAC_CFG_SGMII 0x0000 +#define MAC_CFG_QSGMII 0x4000 +#define MAC_CFG_RGMII 0x8000 + +/* Test page Registers */ +#define MSCC_PHY_TEST_PAGE_5 5 +#define MSCC_PHY_TEST_PAGE_8 8 +#define MSCC_PHY_TEST_PAGE_9 9 +#define MSCC_PHY_TEST_PAGE_20 20 +#define MSCC_PHY_TEST_PAGE_24 24 + +/* Token ring page Registers */ +#define MSCC_PHY_TR_CNTL 16 +#define TR_WRITE 0x8000 +#define TR_ADDR(x) (0x7fff & (x)) +#define MSCC_PHY_TR_LSB 17 +#define MSCC_PHY_TR_MSB 18 + +/* Microsemi PHY ID's + * Code assumes lowest nibble is 0 + */ +#define PHY_ID_VSC8502 0x00070630 +#define PHY_ID_VSC8504 0x000704c0 +#define PHY_ID_VSC8514 0x00070670 +#define PHY_ID_VSC8530 0x00070560 +#define PHY_ID_VSC8531 0x00070570 +#define PHY_ID_VSC8540 0x00070760 +#define PHY_ID_VSC8541 0x00070770 +#define PHY_ID_VSC8552 0x000704e0 +#define PHY_ID_VSC856X 0x000707e0 +#define PHY_ID_VSC8572 0x000704d0 +#define PHY_ID_VSC8574 0x000704a0 +#define PHY_ID_VSC8575 0x000707d0 +#define PHY_ID_VSC8582 0x000707b0 +#define PHY_ID_VSC8584 0x000707c0 + +#define MSCC_VDDMAC_1500 1500 +#define MSCC_VDDMAC_1800 1800 +#define MSCC_VDDMAC_2500 2500 +#define MSCC_VDDMAC_3300 3300 + +#define DOWNSHIFT_COUNT_MAX 5 + +#define MAX_LEDS 4 + +#define VSC8584_SUPP_LED_MODES (BIT(VSC8531_LINK_ACTIVITY) | \ + BIT(VSC8531_LINK_1000_ACTIVITY) | \ + BIT(VSC8531_LINK_100_ACTIVITY) | \ + BIT(VSC8531_LINK_10_ACTIVITY) | \ + BIT(VSC8531_LINK_100_1000_ACTIVITY) | \ + BIT(VSC8531_LINK_10_1000_ACTIVITY) | \ + BIT(VSC8531_LINK_10_100_ACTIVITY) | \ + BIT(VSC8584_LINK_100FX_1000X_ACTIVITY) | \ + BIT(VSC8531_DUPLEX_COLLISION) | \ + BIT(VSC8531_COLLISION) | \ + BIT(VSC8531_ACTIVITY) | \ + BIT(VSC8584_100FX_1000X_ACTIVITY) | \ + BIT(VSC8531_AUTONEG_FAULT) | \ + BIT(VSC8531_SERIAL_MODE) | \ + BIT(VSC8531_FORCE_LED_OFF) | \ + BIT(VSC8531_FORCE_LED_ON)) + +#define VSC85XX_SUPP_LED_MODES (BIT(VSC8531_LINK_ACTIVITY) | \ + BIT(VSC8531_LINK_1000_ACTIVITY) | \ + BIT(VSC8531_LINK_100_ACTIVITY) | \ + BIT(VSC8531_LINK_10_ACTIVITY) | \ + BIT(VSC8531_LINK_100_1000_ACTIVITY) | \ + BIT(VSC8531_LINK_10_1000_ACTIVITY) | \ + BIT(VSC8531_LINK_10_100_ACTIVITY) | \ + BIT(VSC8531_DUPLEX_COLLISION) | \ + BIT(VSC8531_COLLISION) | \ + BIT(VSC8531_ACTIVITY) | \ + BIT(VSC8531_AUTONEG_FAULT) | \ + BIT(VSC8531_SERIAL_MODE) | \ + BIT(VSC8531_FORCE_LED_OFF) | \ + BIT(VSC8531_FORCE_LED_ON)) + +#define MSCC_VSC8584_REVB_INT8051_FW "microchip/mscc_vsc8584_revb_int8051_fb48.bin" +#define MSCC_VSC8584_REVB_INT8051_FW_START_ADDR 0xe800 +#define MSCC_VSC8584_REVB_INT8051_FW_CRC 0xfb48 + +#define MSCC_VSC8574_REVB_INT8051_FW "microchip/mscc_vsc8574_revb_int8051_29e8.bin" +#define MSCC_VSC8574_REVB_INT8051_FW_START_ADDR 0x4000 +#define MSCC_VSC8574_REVB_INT8051_FW_CRC 0x29e8 + +#define VSC8584_REVB 0x0001 +#define MSCC_DEV_REV_MASK GENMASK(3, 0) + +struct reg_val { + u16 reg; + u32 val; +}; + +struct vsc85xx_hw_stat { + const char *string; + u8 reg; + u16 page; + u16 mask; +}; + +struct vsc8531_private { + int rate_magic; + u16 supp_led_modes; + u32 leds_mode[MAX_LEDS]; + u8 nleds; + const struct vsc85xx_hw_stat *hw_stats; + u64 *stats; + int nstats; + bool pkg_init; + /* For multiple port PHYs; the MDIO address of the base PHY in the + * package. + */ + unsigned int base_addr; + +#if IS_ENABLED(CONFIG_MACSEC) + /* MACsec fields: + * - One SecY per device (enforced at the s/w implementation level) + * - macsec_flows: list of h/w flows + * - ingr_flows: bitmap of ingress flows + * - egr_flows: bitmap of egress flows + */ + struct macsec_secy *secy; + struct list_head macsec_flows; + unsigned long ingr_flows; + unsigned long egr_flows; +#endif +}; + +#ifdef CONFIG_OF_MDIO +struct vsc8531_edge_rate_table { + u32 vddmac; + u32 slowdown[8]; +}; +#endif /* CONFIG_OF_MDIO */ + +#if IS_ENABLED(CONFIG_MACSEC) +int vsc8584_macsec_init(struct phy_device *phydev); +void vsc8584_handle_macsec_interrupt(struct phy_device *phydev); +void vsc8584_config_macsec_intr(struct phy_device *phydev); +#else +static inline int vsc8584_macsec_init(struct phy_device *phydev) +{ + return 0; +} +static inline void vsc8584_handle_macsec_interrupt(struct phy_device *phydev) +{ +} +static inline void vsc8584_config_macsec_intr(struct phy_device *phydev) +{ +} +#endif + +#endif /* _MSCC_PHY_H_ */ diff --git a/drivers/net/phy/mscc_fc_buffer.h b/drivers/net/phy/mscc/mscc_fc_buffer.h index 7e9c0e877895..3803e826c37d 100644 --- a/drivers/net/phy/mscc_fc_buffer.h +++ b/drivers/net/phy/mscc/mscc_fc_buffer.h @@ -1,12 +1,12 @@ /* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ /* - * Microsemi Ocelot Switch driver + * Driver for Microsemi VSC85xx PHYs * * Copyright (C) 2019 Microsemi Corporation */ -#ifndef _MSCC_OCELOT_FC_BUFFER_H_ -#define _MSCC_OCELOT_FC_BUFFER_H_ +#ifndef _MSCC_PHY_FC_BUFFER_H_ +#define _MSCC_PHY_FC_BUFFER_H_ #define MSCC_FCBUF_ENA_CFG 0x00 #define MSCC_FCBUF_MODE_CFG 0x01 @@ -61,4 +61,4 @@ #define MSCC_FCBUF_FC_READ_THRESH_CFG_RX_THRESH(x) ((x) << 16) #define MSCC_FCBUF_FC_READ_THRESH_CFG_RX_THRESH_M GENMASK(31, 16) -#endif +#endif /* _MSCC_PHY_FC_BUFFER_H_ */ diff --git a/drivers/net/phy/mscc_mac.h b/drivers/net/phy/mscc/mscc_mac.h index 9420ee5175a6..fcb5ba5e5d03 100644 --- a/drivers/net/phy/mscc_mac.h +++ b/drivers/net/phy/mscc/mscc_mac.h @@ -1,12 +1,12 @@ /* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ /* - * Microsemi Ocelot Switch driver + * Driver for Microsemi VSC85xx PHYs * * Copyright (c) 2017 Microsemi Corporation */ -#ifndef _MSCC_OCELOT_LINE_MAC_H_ -#define _MSCC_OCELOT_LINE_MAC_H_ +#ifndef _MSCC_PHY_LINE_MAC_H_ +#define _MSCC_PHY_LINE_MAC_H_ #define MSCC_MAC_CFG_ENA_CFG 0x00 #define MSCC_MAC_CFG_MODE_CFG 0x01 @@ -156,4 +156,4 @@ #define MSCC_PROC_0_IP_1588_TOP_CFG_STAT_MODE_CTL_PROTOCOL_MODE(x) (x) #define MSCC_PROC_0_IP_1588_TOP_CFG_STAT_MODE_CTL_PROTOCOL_MODE_M GENMASK(2, 0) -#endif /* _MSCC_OCELOT_LINE_MAC_H_ */ +#endif /* _MSCC_PHY_LINE_MAC_H_ */ diff --git a/drivers/net/phy/mscc/mscc_macsec.c b/drivers/net/phy/mscc/mscc_macsec.c new file mode 100644 index 000000000000..e99e2cd72a0c --- /dev/null +++ b/drivers/net/phy/mscc/mscc_macsec.c @@ -0,0 +1,1051 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* + * Driver for Microsemi VSC85xx PHYs + * + * Author: Nagaraju Lakkaraju + * License: Dual MIT/GPL + * Copyright (c) 2016 Microsemi Corporation + */ + +#include <linux/phy.h> +#include <dt-bindings/net/mscc-phy-vsc8531.h> + +#include <crypto/skcipher.h> + +#include <net/macsec.h> + +#include "mscc.h" +#include "mscc_mac.h" +#include "mscc_macsec.h" +#include "mscc_fc_buffer.h" + +static u32 vsc8584_macsec_phy_read(struct phy_device *phydev, + enum macsec_bank bank, u32 reg) +{ + u32 val, val_l = 0, val_h = 0; + unsigned long deadline; + int rc; + + rc = phy_select_page(phydev, MSCC_PHY_PAGE_MACSEC); + if (rc < 0) + goto failed; + + __phy_write(phydev, MSCC_EXT_PAGE_MACSEC_20, + MSCC_PHY_MACSEC_20_TARGET(bank >> 2)); + + if (bank >> 2 == 0x1) + /* non-MACsec access */ + bank &= 0x3; + else + bank = 0; + + __phy_write(phydev, MSCC_EXT_PAGE_MACSEC_19, + MSCC_PHY_MACSEC_19_CMD | MSCC_PHY_MACSEC_19_READ | + MSCC_PHY_MACSEC_19_REG_ADDR(reg) | + MSCC_PHY_MACSEC_19_TARGET(bank)); + + deadline = jiffies + msecs_to_jiffies(PROC_CMD_NCOMPLETED_TIMEOUT_MS); + do { + val = __phy_read(phydev, MSCC_EXT_PAGE_MACSEC_19); + } while (time_before(jiffies, deadline) && !(val & MSCC_PHY_MACSEC_19_CMD)); + + val_l = __phy_read(phydev, MSCC_EXT_PAGE_MACSEC_17); + val_h = __phy_read(phydev, MSCC_EXT_PAGE_MACSEC_18); + +failed: + phy_restore_page(phydev, rc, rc); + + return (val_h << 16) | val_l; +} + +static void vsc8584_macsec_phy_write(struct phy_device *phydev, + enum macsec_bank bank, u32 reg, u32 val) +{ + unsigned long deadline; + int rc; + + rc = phy_select_page(phydev, MSCC_PHY_PAGE_MACSEC); + if (rc < 0) + goto failed; + + __phy_write(phydev, MSCC_EXT_PAGE_MACSEC_20, + MSCC_PHY_MACSEC_20_TARGET(bank >> 2)); + + if ((bank >> 2 == 0x1) || (bank >> 2 == 0x3)) + bank &= 0x3; + else + /* MACsec access */ + bank = 0; + + __phy_write(phydev, MSCC_EXT_PAGE_MACSEC_17, (u16)val); + __phy_write(phydev, MSCC_EXT_PAGE_MACSEC_18, (u16)(val >> 16)); + + __phy_write(phydev, MSCC_EXT_PAGE_MACSEC_19, + MSCC_PHY_MACSEC_19_CMD | MSCC_PHY_MACSEC_19_REG_ADDR(reg) | + MSCC_PHY_MACSEC_19_TARGET(bank)); + + deadline = jiffies + msecs_to_jiffies(PROC_CMD_NCOMPLETED_TIMEOUT_MS); + do { + val = __phy_read(phydev, MSCC_EXT_PAGE_MACSEC_19); + } while (time_before(jiffies, deadline) && !(val & MSCC_PHY_MACSEC_19_CMD)); + +failed: + phy_restore_page(phydev, rc, rc); +} + +static void vsc8584_macsec_classification(struct phy_device *phydev, + enum macsec_bank bank) +{ + /* enable VLAN tag parsing */ + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_CP_TAG, + MSCC_MS_SAM_CP_TAG_PARSE_STAG | + MSCC_MS_SAM_CP_TAG_PARSE_QTAG | + MSCC_MS_SAM_CP_TAG_PARSE_QINQ); +} + +static void vsc8584_macsec_flow_default_action(struct phy_device *phydev, + enum macsec_bank bank, + bool block) +{ + u32 port = (bank == MACSEC_INGR) ? + MSCC_MS_PORT_UNCONTROLLED : MSCC_MS_PORT_COMMON; + u32 action = MSCC_MS_FLOW_BYPASS; + + if (block) + action = MSCC_MS_FLOW_DROP; + + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_NM_FLOW_NCP, + /* MACsec untagged */ + MSCC_MS_SAM_NM_FLOW_NCP_UNTAGGED_FLOW_TYPE(action) | + MSCC_MS_SAM_NM_FLOW_NCP_UNTAGGED_DROP_ACTION(MSCC_MS_ACTION_DROP) | + MSCC_MS_SAM_NM_FLOW_NCP_UNTAGGED_DEST_PORT(port) | + /* MACsec tagged */ + MSCC_MS_SAM_NM_FLOW_NCP_TAGGED_FLOW_TYPE(action) | + MSCC_MS_SAM_NM_FLOW_NCP_TAGGED_DROP_ACTION(MSCC_MS_ACTION_DROP) | + MSCC_MS_SAM_NM_FLOW_NCP_TAGGED_DEST_PORT(port) | + /* Bad tag */ + MSCC_MS_SAM_NM_FLOW_NCP_BADTAG_FLOW_TYPE(action) | + MSCC_MS_SAM_NM_FLOW_NCP_BADTAG_DROP_ACTION(MSCC_MS_ACTION_DROP) | + MSCC_MS_SAM_NM_FLOW_NCP_BADTAG_DEST_PORT(port) | + /* Kay tag */ + MSCC_MS_SAM_NM_FLOW_NCP_KAY_FLOW_TYPE(action) | + MSCC_MS_SAM_NM_FLOW_NCP_KAY_DROP_ACTION(MSCC_MS_ACTION_DROP) | + MSCC_MS_SAM_NM_FLOW_NCP_KAY_DEST_PORT(port)); + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_NM_FLOW_CP, + /* MACsec untagged */ + MSCC_MS_SAM_NM_FLOW_NCP_UNTAGGED_FLOW_TYPE(action) | + MSCC_MS_SAM_NM_FLOW_CP_UNTAGGED_DROP_ACTION(MSCC_MS_ACTION_DROP) | + MSCC_MS_SAM_NM_FLOW_CP_UNTAGGED_DEST_PORT(port) | + /* MACsec tagged */ + MSCC_MS_SAM_NM_FLOW_NCP_TAGGED_FLOW_TYPE(action) | + MSCC_MS_SAM_NM_FLOW_CP_TAGGED_DROP_ACTION(MSCC_MS_ACTION_DROP) | + MSCC_MS_SAM_NM_FLOW_CP_TAGGED_DEST_PORT(port) | + /* Bad tag */ + MSCC_MS_SAM_NM_FLOW_NCP_BADTAG_FLOW_TYPE(action) | + MSCC_MS_SAM_NM_FLOW_CP_BADTAG_DROP_ACTION(MSCC_MS_ACTION_DROP) | + MSCC_MS_SAM_NM_FLOW_CP_BADTAG_DEST_PORT(port) | + /* Kay tag */ + MSCC_MS_SAM_NM_FLOW_NCP_KAY_FLOW_TYPE(action) | + MSCC_MS_SAM_NM_FLOW_CP_KAY_DROP_ACTION(MSCC_MS_ACTION_DROP) | + MSCC_MS_SAM_NM_FLOW_CP_KAY_DEST_PORT(port)); +} + +static void vsc8584_macsec_integrity_checks(struct phy_device *phydev, + enum macsec_bank bank) +{ + u32 val; + + if (bank != MACSEC_INGR) + return; + + /* Set default rules to pass unmatched frames */ + val = vsc8584_macsec_phy_read(phydev, bank, + MSCC_MS_PARAMS2_IG_CC_CONTROL); + val |= MSCC_MS_PARAMS2_IG_CC_CONTROL_NON_MATCH_CTRL_ACT | + MSCC_MS_PARAMS2_IG_CC_CONTROL_NON_MATCH_ACT; + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_PARAMS2_IG_CC_CONTROL, + val); + + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_PARAMS2_IG_CP_TAG, + MSCC_MS_PARAMS2_IG_CP_TAG_PARSE_STAG | + MSCC_MS_PARAMS2_IG_CP_TAG_PARSE_QTAG | + MSCC_MS_PARAMS2_IG_CP_TAG_PARSE_QINQ); +} + +static void vsc8584_macsec_block_init(struct phy_device *phydev, + enum macsec_bank bank) +{ + u32 val; + int i; + + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_ENA_CFG, + MSCC_MS_ENA_CFG_SW_RST | + MSCC_MS_ENA_CFG_MACSEC_BYPASS_ENA); + + /* Set the MACsec block out of s/w reset and enable clocks */ + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_ENA_CFG, + MSCC_MS_ENA_CFG_CLK_ENA); + + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_STATUS_CONTEXT_CTRL, + bank == MACSEC_INGR ? 0xe5880214 : 0xe5880218); + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_MISC_CONTROL, + MSCC_MS_MISC_CONTROL_MC_LATENCY_FIX(bank == MACSEC_INGR ? 57 : 40) | + MSCC_MS_MISC_CONTROL_XFORM_REC_SIZE(bank == MACSEC_INGR ? 1 : 2)); + + /* Clear the counters */ + val = vsc8584_macsec_phy_read(phydev, bank, MSCC_MS_COUNT_CONTROL); + val |= MSCC_MS_COUNT_CONTROL_AUTO_CNTR_RESET; + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_COUNT_CONTROL, val); + + /* Enable octet increment mode */ + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_PP_CTRL, + MSCC_MS_PP_CTRL_MACSEC_OCTET_INCR_MODE); + + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_BLOCK_CTX_UPDATE, 0x3); + + val = vsc8584_macsec_phy_read(phydev, bank, MSCC_MS_COUNT_CONTROL); + val |= MSCC_MS_COUNT_CONTROL_RESET_ALL; + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_COUNT_CONTROL, val); + + /* Set the MTU */ + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_NON_VLAN_MTU_CHECK, + MSCC_MS_NON_VLAN_MTU_CHECK_NV_MTU_COMPARE(32761) | + MSCC_MS_NON_VLAN_MTU_CHECK_NV_MTU_COMP_DROP); + + for (i = 0; i < 8; i++) + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_VLAN_MTU_CHECK(i), + MSCC_MS_VLAN_MTU_CHECK_MTU_COMPARE(32761) | + MSCC_MS_VLAN_MTU_CHECK_MTU_COMP_DROP); + + if (bank == MACSEC_EGR) { + val = vsc8584_macsec_phy_read(phydev, bank, MSCC_MS_INTR_CTRL_STATUS); + val &= ~MSCC_MS_INTR_CTRL_STATUS_INTR_ENABLE_M; + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_INTR_CTRL_STATUS, val); + + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_FC_CFG, + MSCC_MS_FC_CFG_FCBUF_ENA | + MSCC_MS_FC_CFG_LOW_THRESH(0x1) | + MSCC_MS_FC_CFG_HIGH_THRESH(0x4) | + MSCC_MS_FC_CFG_LOW_BYTES_VAL(0x4) | + MSCC_MS_FC_CFG_HIGH_BYTES_VAL(0x6)); + } + + vsc8584_macsec_classification(phydev, bank); + vsc8584_macsec_flow_default_action(phydev, bank, false); + vsc8584_macsec_integrity_checks(phydev, bank); + + /* Enable the MACsec block */ + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_ENA_CFG, + MSCC_MS_ENA_CFG_CLK_ENA | + MSCC_MS_ENA_CFG_MACSEC_ENA | + MSCC_MS_ENA_CFG_MACSEC_SPEED_MODE(0x5)); +} + +static void vsc8584_macsec_mac_init(struct phy_device *phydev, + enum macsec_bank bank) +{ + u32 val; + int i; + + /* Clear host & line stats */ + for (i = 0; i < 36; i++) + vsc8584_macsec_phy_write(phydev, bank, 0x1c + i, 0); + + val = vsc8584_macsec_phy_read(phydev, bank, + MSCC_MAC_PAUSE_CFG_TX_FRAME_CTRL); + val &= ~MSCC_MAC_PAUSE_CFG_TX_FRAME_CTRL_PAUSE_MODE_M; + val |= MSCC_MAC_PAUSE_CFG_TX_FRAME_CTRL_PAUSE_MODE(2) | + MSCC_MAC_PAUSE_CFG_TX_FRAME_CTRL_PAUSE_VALUE(0xffff); + vsc8584_macsec_phy_write(phydev, bank, + MSCC_MAC_PAUSE_CFG_TX_FRAME_CTRL, val); + + val = vsc8584_macsec_phy_read(phydev, bank, + MSCC_MAC_PAUSE_CFG_TX_FRAME_CTRL_2); + val |= 0xffff; + vsc8584_macsec_phy_write(phydev, bank, + MSCC_MAC_PAUSE_CFG_TX_FRAME_CTRL_2, val); + + val = vsc8584_macsec_phy_read(phydev, bank, + MSCC_MAC_PAUSE_CFG_RX_FRAME_CTRL); + if (bank == HOST_MAC) + val |= MSCC_MAC_PAUSE_CFG_RX_FRAME_CTRL_PAUSE_TIMER_ENA | + MSCC_MAC_PAUSE_CFG_RX_FRAME_CTRL_PAUSE_FRAME_DROP_ENA; + else + val |= MSCC_MAC_PAUSE_CFG_RX_FRAME_CTRL_PAUSE_REACT_ENA | + MSCC_MAC_PAUSE_CFG_RX_FRAME_CTRL_PAUSE_FRAME_DROP_ENA | + MSCC_MAC_PAUSE_CFG_RX_FRAME_CTRL_PAUSE_MODE | + MSCC_MAC_PAUSE_CFG_RX_FRAME_CTRL_EARLY_PAUSE_DETECT_ENA; + vsc8584_macsec_phy_write(phydev, bank, + MSCC_MAC_PAUSE_CFG_RX_FRAME_CTRL, val); + + vsc8584_macsec_phy_write(phydev, bank, MSCC_MAC_CFG_PKTINF_CFG, + MSCC_MAC_CFG_PKTINF_CFG_STRIP_FCS_ENA | + MSCC_MAC_CFG_PKTINF_CFG_INSERT_FCS_ENA | + MSCC_MAC_CFG_PKTINF_CFG_LPI_RELAY_ENA | + MSCC_MAC_CFG_PKTINF_CFG_STRIP_PREAMBLE_ENA | + MSCC_MAC_CFG_PKTINF_CFG_INSERT_PREAMBLE_ENA | + (bank == HOST_MAC ? + MSCC_MAC_CFG_PKTINF_CFG_ENABLE_TX_PADDING : 0)); + + val = vsc8584_macsec_phy_read(phydev, bank, MSCC_MAC_CFG_MODE_CFG); + val &= ~MSCC_MAC_CFG_MODE_CFG_DISABLE_DIC; + vsc8584_macsec_phy_write(phydev, bank, MSCC_MAC_CFG_MODE_CFG, val); + + val = vsc8584_macsec_phy_read(phydev, bank, MSCC_MAC_CFG_MAXLEN_CFG); + val &= ~MSCC_MAC_CFG_MAXLEN_CFG_MAX_LEN_M; + val |= MSCC_MAC_CFG_MAXLEN_CFG_MAX_LEN(10240); + vsc8584_macsec_phy_write(phydev, bank, MSCC_MAC_CFG_MAXLEN_CFG, val); + + vsc8584_macsec_phy_write(phydev, bank, MSCC_MAC_CFG_ADV_CHK_CFG, + MSCC_MAC_CFG_ADV_CHK_CFG_SFD_CHK_ENA | + MSCC_MAC_CFG_ADV_CHK_CFG_PRM_CHK_ENA | + MSCC_MAC_CFG_ADV_CHK_CFG_OOR_ERR_ENA | + MSCC_MAC_CFG_ADV_CHK_CFG_INR_ERR_ENA); + + val = vsc8584_macsec_phy_read(phydev, bank, MSCC_MAC_CFG_LFS_CFG); + val &= ~MSCC_MAC_CFG_LFS_CFG_LFS_MODE_ENA; + vsc8584_macsec_phy_write(phydev, bank, MSCC_MAC_CFG_LFS_CFG, val); + + vsc8584_macsec_phy_write(phydev, bank, MSCC_MAC_CFG_ENA_CFG, + MSCC_MAC_CFG_ENA_CFG_RX_CLK_ENA | + MSCC_MAC_CFG_ENA_CFG_TX_CLK_ENA | + MSCC_MAC_CFG_ENA_CFG_RX_ENA | + MSCC_MAC_CFG_ENA_CFG_TX_ENA); +} + +/* Must be called with mdio_lock taken */ +static int __vsc8584_macsec_init(struct phy_device *phydev) +{ + u32 val; + + vsc8584_macsec_block_init(phydev, MACSEC_INGR); + vsc8584_macsec_block_init(phydev, MACSEC_EGR); + vsc8584_macsec_mac_init(phydev, HOST_MAC); + vsc8584_macsec_mac_init(phydev, LINE_MAC); + + vsc8584_macsec_phy_write(phydev, FC_BUFFER, + MSCC_FCBUF_FC_READ_THRESH_CFG, + MSCC_FCBUF_FC_READ_THRESH_CFG_TX_THRESH(4) | + MSCC_FCBUF_FC_READ_THRESH_CFG_RX_THRESH(5)); + + val = vsc8584_macsec_phy_read(phydev, FC_BUFFER, MSCC_FCBUF_MODE_CFG); + val |= MSCC_FCBUF_MODE_CFG_PAUSE_GEN_ENA | + MSCC_FCBUF_MODE_CFG_RX_PPM_RATE_ADAPT_ENA | + MSCC_FCBUF_MODE_CFG_TX_PPM_RATE_ADAPT_ENA; + vsc8584_macsec_phy_write(phydev, FC_BUFFER, MSCC_FCBUF_MODE_CFG, val); + + vsc8584_macsec_phy_write(phydev, FC_BUFFER, MSCC_FCBUF_PPM_RATE_ADAPT_THRESH_CFG, + MSCC_FCBUF_PPM_RATE_ADAPT_THRESH_CFG_TX_THRESH(8) | + MSCC_FCBUF_PPM_RATE_ADAPT_THRESH_CFG_TX_OFFSET(9)); + + val = vsc8584_macsec_phy_read(phydev, FC_BUFFER, + MSCC_FCBUF_TX_DATA_QUEUE_CFG); + val &= ~(MSCC_FCBUF_TX_DATA_QUEUE_CFG_START_M | + MSCC_FCBUF_TX_DATA_QUEUE_CFG_END_M); + val |= MSCC_FCBUF_TX_DATA_QUEUE_CFG_START(0) | + MSCC_FCBUF_TX_DATA_QUEUE_CFG_END(5119); + vsc8584_macsec_phy_write(phydev, FC_BUFFER, + MSCC_FCBUF_TX_DATA_QUEUE_CFG, val); + + val = vsc8584_macsec_phy_read(phydev, FC_BUFFER, MSCC_FCBUF_ENA_CFG); + val |= MSCC_FCBUF_ENA_CFG_TX_ENA | MSCC_FCBUF_ENA_CFG_RX_ENA; + vsc8584_macsec_phy_write(phydev, FC_BUFFER, MSCC_FCBUF_ENA_CFG, val); + + val = vsc8584_macsec_phy_read(phydev, IP_1588, + MSCC_PROC_0_IP_1588_TOP_CFG_STAT_MODE_CTL); + val &= ~MSCC_PROC_0_IP_1588_TOP_CFG_STAT_MODE_CTL_PROTOCOL_MODE_M; + val |= MSCC_PROC_0_IP_1588_TOP_CFG_STAT_MODE_CTL_PROTOCOL_MODE(4); + vsc8584_macsec_phy_write(phydev, IP_1588, + MSCC_PROC_0_IP_1588_TOP_CFG_STAT_MODE_CTL, val); + + return 0; +} + +static void vsc8584_macsec_flow(struct phy_device *phydev, + struct macsec_flow *flow) +{ + struct vsc8531_private *priv = phydev->priv; + enum macsec_bank bank = flow->bank; + u32 val, match = 0, mask = 0, action = 0, idx = flow->index; + + if (flow->match.tagged) + match |= MSCC_MS_SAM_MISC_MATCH_TAGGED; + if (flow->match.untagged) + match |= MSCC_MS_SAM_MISC_MATCH_UNTAGGED; + + if (bank == MACSEC_INGR && flow->assoc_num >= 0) { + match |= MSCC_MS_SAM_MISC_MATCH_AN(flow->assoc_num); + mask |= MSCC_MS_SAM_MASK_AN_MASK(0x3); + } + + if (bank == MACSEC_INGR && flow->match.sci && flow->rx_sa->sc->sci) { + match |= MSCC_MS_SAM_MISC_MATCH_TCI(BIT(3)); + mask |= MSCC_MS_SAM_MASK_TCI_MASK(BIT(3)) | + MSCC_MS_SAM_MASK_SCI_MASK; + + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_MATCH_SCI_LO(idx), + lower_32_bits(flow->rx_sa->sc->sci)); + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_MATCH_SCI_HI(idx), + upper_32_bits(flow->rx_sa->sc->sci)); + } + + if (flow->match.etype) { + mask |= MSCC_MS_SAM_MASK_MAC_ETYPE_MASK; + + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_MAC_SA_MATCH_HI(idx), + MSCC_MS_SAM_MAC_SA_MATCH_HI_ETYPE(htons(flow->etype))); + } + + match |= MSCC_MS_SAM_MISC_MATCH_PRIORITY(flow->priority); + + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_MISC_MATCH(idx), match); + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_MASK(idx), mask); + + /* Action for matching packets */ + if (flow->action.drop) + action = MSCC_MS_FLOW_DROP; + else if (flow->action.bypass || flow->port == MSCC_MS_PORT_UNCONTROLLED) + action = MSCC_MS_FLOW_BYPASS; + else + action = (bank == MACSEC_INGR) ? + MSCC_MS_FLOW_INGRESS : MSCC_MS_FLOW_EGRESS; + + val = MSCC_MS_SAM_FLOW_CTRL_FLOW_TYPE(action) | + MSCC_MS_SAM_FLOW_CTRL_DROP_ACTION(MSCC_MS_ACTION_DROP) | + MSCC_MS_SAM_FLOW_CTRL_DEST_PORT(flow->port); + + if (action == MSCC_MS_FLOW_BYPASS) + goto write_ctrl; + + if (bank == MACSEC_INGR) { + if (priv->secy->replay_protect) + val |= MSCC_MS_SAM_FLOW_CTRL_REPLAY_PROTECT; + if (priv->secy->validate_frames == MACSEC_VALIDATE_STRICT) + val |= MSCC_MS_SAM_FLOW_CTRL_VALIDATE_FRAMES(MSCC_MS_VALIDATE_STRICT); + else if (priv->secy->validate_frames == MACSEC_VALIDATE_CHECK) + val |= MSCC_MS_SAM_FLOW_CTRL_VALIDATE_FRAMES(MSCC_MS_VALIDATE_CHECK); + } else if (bank == MACSEC_EGR) { + if (priv->secy->protect_frames) + val |= MSCC_MS_SAM_FLOW_CTRL_PROTECT_FRAME; + if (priv->secy->tx_sc.encrypt) + val |= MSCC_MS_SAM_FLOW_CTRL_CONF_PROTECT; + if (priv->secy->tx_sc.send_sci) + val |= MSCC_MS_SAM_FLOW_CTRL_INCLUDE_SCI; + } + +write_ctrl: + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_FLOW_CTRL(idx), val); +} + +static struct macsec_flow *vsc8584_macsec_find_flow(struct macsec_context *ctx, + enum macsec_bank bank) +{ + struct vsc8531_private *priv = ctx->phydev->priv; + struct macsec_flow *pos, *tmp; + + list_for_each_entry_safe(pos, tmp, &priv->macsec_flows, list) + if (pos->assoc_num == ctx->sa.assoc_num && pos->bank == bank) + return pos; + + return ERR_PTR(-ENOENT); +} + +static void vsc8584_macsec_flow_enable(struct phy_device *phydev, + struct macsec_flow *flow) +{ + enum macsec_bank bank = flow->bank; + u32 val, idx = flow->index; + + if ((flow->bank == MACSEC_INGR && flow->rx_sa && !flow->rx_sa->active) || + (flow->bank == MACSEC_EGR && flow->tx_sa && !flow->tx_sa->active)) + return; + + /* Enable */ + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_ENTRY_SET1, BIT(idx)); + + /* Set in-use */ + val = vsc8584_macsec_phy_read(phydev, bank, MSCC_MS_SAM_FLOW_CTRL(idx)); + val |= MSCC_MS_SAM_FLOW_CTRL_SA_IN_USE; + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_FLOW_CTRL(idx), val); +} + +static void vsc8584_macsec_flow_disable(struct phy_device *phydev, + struct macsec_flow *flow) +{ + enum macsec_bank bank = flow->bank; + u32 val, idx = flow->index; + + /* Disable */ + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_ENTRY_CLEAR1, BIT(idx)); + + /* Clear in-use */ + val = vsc8584_macsec_phy_read(phydev, bank, MSCC_MS_SAM_FLOW_CTRL(idx)); + val &= ~MSCC_MS_SAM_FLOW_CTRL_SA_IN_USE; + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_FLOW_CTRL(idx), val); +} + +static u32 vsc8584_macsec_flow_context_id(struct macsec_flow *flow) +{ + if (flow->bank == MACSEC_INGR) + return flow->index + MSCC_MS_MAX_FLOWS; + + return flow->index; +} + +/* Derive the AES key to get a key for the hash autentication */ +static int vsc8584_macsec_derive_key(const u8 key[MACSEC_KEYID_LEN], + u16 key_len, u8 hkey[16]) +{ + struct crypto_skcipher *tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0); + struct skcipher_request *req = NULL; + struct scatterlist src, dst; + DECLARE_CRYPTO_WAIT(wait); + u32 input[4] = {0}; + int ret; + + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + req = skcipher_request_alloc(tfm, GFP_KERNEL); + if (!req) { + ret = -ENOMEM; + goto out; + } + + skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, + &wait); + ret = crypto_skcipher_setkey(tfm, key, key_len); + if (ret < 0) + goto out; + + sg_init_one(&src, input, 16); + sg_init_one(&dst, hkey, 16); + skcipher_request_set_crypt(req, &src, &dst, 16, NULL); + + ret = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); + +out: + skcipher_request_free(req); + crypto_free_skcipher(tfm); + return ret; +} + +static int vsc8584_macsec_transformation(struct phy_device *phydev, + struct macsec_flow *flow) +{ + struct vsc8531_private *priv = phydev->priv; + enum macsec_bank bank = flow->bank; + int i, ret, index = flow->index; + u32 rec = 0, control = 0; + u8 hkey[16]; + sci_t sci; + + ret = vsc8584_macsec_derive_key(flow->key, priv->secy->key_len, hkey); + if (ret) + return ret; + + switch (priv->secy->key_len) { + case 16: + control |= CONTROL_CRYPTO_ALG(CTRYPTO_ALG_AES_CTR_128); + break; + case 32: + control |= CONTROL_CRYPTO_ALG(CTRYPTO_ALG_AES_CTR_256); + break; + default: + return -EINVAL; + } + + control |= (bank == MACSEC_EGR) ? + (CONTROL_TYPE_EGRESS | CONTROL_AN(priv->secy->tx_sc.encoding_sa)) : + (CONTROL_TYPE_INGRESS | CONTROL_SEQ_MASK); + + control |= CONTROL_UPDATE_SEQ | CONTROL_ENCRYPT_AUTH | CONTROL_KEY_IN_CTX | + CONTROL_IV0 | CONTROL_IV1 | CONTROL_IV_IN_SEQ | + CONTROL_DIGEST_TYPE(0x2) | CONTROL_SEQ_TYPE(0x1) | + CONTROL_AUTH_ALG(AUTH_ALG_AES_GHAS) | CONTROL_CONTEXT_ID; + + /* Set the control word */ + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_XFORM_REC(index, rec++), + control); + + /* Set the context ID. Must be unique. */ + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_XFORM_REC(index, rec++), + vsc8584_macsec_flow_context_id(flow)); + + /* Set the encryption/decryption key */ + for (i = 0; i < priv->secy->key_len / sizeof(u32); i++) + vsc8584_macsec_phy_write(phydev, bank, + MSCC_MS_XFORM_REC(index, rec++), + ((u32 *)flow->key)[i]); + + /* Set the authentication key */ + for (i = 0; i < 4; i++) + vsc8584_macsec_phy_write(phydev, bank, + MSCC_MS_XFORM_REC(index, rec++), + ((u32 *)hkey)[i]); + + /* Initial sequence number */ + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_XFORM_REC(index, rec++), + bank == MACSEC_INGR ? + flow->rx_sa->next_pn : flow->tx_sa->next_pn); + + if (bank == MACSEC_INGR) + /* Set the mask (replay window size) */ + vsc8584_macsec_phy_write(phydev, bank, + MSCC_MS_XFORM_REC(index, rec++), + priv->secy->replay_window); + + /* Set the input vectors */ + sci = bank == MACSEC_INGR ? flow->rx_sa->sc->sci : priv->secy->sci; + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_XFORM_REC(index, rec++), + lower_32_bits(sci)); + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_XFORM_REC(index, rec++), + upper_32_bits(sci)); + + while (rec < 20) + vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_XFORM_REC(index, rec++), + 0); + + flow->has_transformation = true; + return 0; +} + +static struct macsec_flow *vsc8584_macsec_alloc_flow(struct vsc8531_private *priv, + enum macsec_bank bank) +{ + unsigned long *bitmap = bank == MACSEC_INGR ? + &priv->ingr_flows : &priv->egr_flows; + struct macsec_flow *flow; + int index; + + index = find_first_zero_bit(bitmap, MSCC_MS_MAX_FLOWS); + + if (index == MSCC_MS_MAX_FLOWS) + return ERR_PTR(-ENOMEM); + + flow = kzalloc(sizeof(*flow), GFP_KERNEL); + if (!flow) + return ERR_PTR(-ENOMEM); + + set_bit(index, bitmap); + flow->index = index; + flow->bank = bank; + flow->priority = 8; + flow->assoc_num = -1; + + list_add_tail(&flow->list, &priv->macsec_flows); + return flow; +} + +static void vsc8584_macsec_free_flow(struct vsc8531_private *priv, + struct macsec_flow *flow) +{ + unsigned long *bitmap = flow->bank == MACSEC_INGR ? + &priv->ingr_flows : &priv->egr_flows; + + list_del(&flow->list); + clear_bit(flow->index, bitmap); + kfree(flow); +} + +static int vsc8584_macsec_add_flow(struct phy_device *phydev, + struct macsec_flow *flow, bool update) +{ + int ret; + + flow->port = MSCC_MS_PORT_CONTROLLED; + vsc8584_macsec_flow(phydev, flow); + + if (update) + return 0; + + ret = vsc8584_macsec_transformation(phydev, flow); + if (ret) { + vsc8584_macsec_free_flow(phydev->priv, flow); + return ret; + } + + return 0; +} + +static int vsc8584_macsec_default_flows(struct phy_device *phydev) +{ + struct macsec_flow *flow; + + /* Add a rule to let the MKA traffic go through, ingress */ + flow = vsc8584_macsec_alloc_flow(phydev->priv, MACSEC_INGR); + if (IS_ERR(flow)) + return PTR_ERR(flow); + + flow->priority = 15; + flow->port = MSCC_MS_PORT_UNCONTROLLED; + flow->match.tagged = 1; + flow->match.untagged = 1; + flow->match.etype = 1; + flow->etype = ETH_P_PAE; + flow->action.bypass = 1; + + vsc8584_macsec_flow(phydev, flow); + vsc8584_macsec_flow_enable(phydev, flow); + + /* Add a rule to let the MKA traffic go through, egress */ + flow = vsc8584_macsec_alloc_flow(phydev->priv, MACSEC_EGR); + if (IS_ERR(flow)) + return PTR_ERR(flow); + + flow->priority = 15; + flow->port = MSCC_MS_PORT_COMMON; + flow->match.untagged = 1; + flow->match.etype = 1; + flow->etype = ETH_P_PAE; + flow->action.bypass = 1; + + vsc8584_macsec_flow(phydev, flow); + vsc8584_macsec_flow_enable(phydev, flow); + + return 0; +} + +static void vsc8584_macsec_del_flow(struct phy_device *phydev, + struct macsec_flow *flow) +{ + vsc8584_macsec_flow_disable(phydev, flow); + vsc8584_macsec_free_flow(phydev->priv, flow); +} + +static int __vsc8584_macsec_add_rxsa(struct macsec_context *ctx, + struct macsec_flow *flow, bool update) +{ + struct phy_device *phydev = ctx->phydev; + struct vsc8531_private *priv = phydev->priv; + + if (!flow) { + flow = vsc8584_macsec_alloc_flow(priv, MACSEC_INGR); + if (IS_ERR(flow)) + return PTR_ERR(flow); + + memcpy(flow->key, ctx->sa.key, priv->secy->key_len); + } + + flow->assoc_num = ctx->sa.assoc_num; + flow->rx_sa = ctx->sa.rx_sa; + + /* Always match tagged packets on ingress */ + flow->match.tagged = 1; + flow->match.sci = 1; + + if (priv->secy->validate_frames != MACSEC_VALIDATE_DISABLED) + flow->match.untagged = 1; + + return vsc8584_macsec_add_flow(phydev, flow, update); +} + +static int __vsc8584_macsec_add_txsa(struct macsec_context *ctx, + struct macsec_flow *flow, bool update) +{ + struct phy_device *phydev = ctx->phydev; + struct vsc8531_private *priv = phydev->priv; + + if (!flow) { + flow = vsc8584_macsec_alloc_flow(priv, MACSEC_EGR); + if (IS_ERR(flow)) + return PTR_ERR(flow); + + memcpy(flow->key, ctx->sa.key, priv->secy->key_len); + } + + flow->assoc_num = ctx->sa.assoc_num; + flow->tx_sa = ctx->sa.tx_sa; + + /* Always match untagged packets on egress */ + flow->match.untagged = 1; + + return vsc8584_macsec_add_flow(phydev, flow, update); +} + +static int vsc8584_macsec_dev_open(struct macsec_context *ctx) +{ + struct vsc8531_private *priv = ctx->phydev->priv; + struct macsec_flow *flow, *tmp; + + /* No operation to perform before the commit step */ + if (ctx->prepare) + return 0; + + list_for_each_entry_safe(flow, tmp, &priv->macsec_flows, list) + vsc8584_macsec_flow_enable(ctx->phydev, flow); + + return 0; +} + +static int vsc8584_macsec_dev_stop(struct macsec_context *ctx) +{ + struct vsc8531_private *priv = ctx->phydev->priv; + struct macsec_flow *flow, *tmp; + + /* No operation to perform before the commit step */ + if (ctx->prepare) + return 0; + + list_for_each_entry_safe(flow, tmp, &priv->macsec_flows, list) + vsc8584_macsec_flow_disable(ctx->phydev, flow); + + return 0; +} + +static int vsc8584_macsec_add_secy(struct macsec_context *ctx) +{ + struct vsc8531_private *priv = ctx->phydev->priv; + struct macsec_secy *secy = ctx->secy; + + if (ctx->prepare) { + if (priv->secy) + return -EEXIST; + + return 0; + } + + priv->secy = secy; + + vsc8584_macsec_flow_default_action(ctx->phydev, MACSEC_EGR, + secy->validate_frames != MACSEC_VALIDATE_DISABLED); + vsc8584_macsec_flow_default_action(ctx->phydev, MACSEC_INGR, + secy->validate_frames != MACSEC_VALIDATE_DISABLED); + + return vsc8584_macsec_default_flows(ctx->phydev); +} + +static int vsc8584_macsec_del_secy(struct macsec_context *ctx) +{ + struct vsc8531_private *priv = ctx->phydev->priv; + struct macsec_flow *flow, *tmp; + + /* No operation to perform before the commit step */ + if (ctx->prepare) + return 0; + + list_for_each_entry_safe(flow, tmp, &priv->macsec_flows, list) + vsc8584_macsec_del_flow(ctx->phydev, flow); + + vsc8584_macsec_flow_default_action(ctx->phydev, MACSEC_EGR, false); + vsc8584_macsec_flow_default_action(ctx->phydev, MACSEC_INGR, false); + + priv->secy = NULL; + return 0; +} + +static int vsc8584_macsec_upd_secy(struct macsec_context *ctx) +{ + /* No operation to perform before the commit step */ + if (ctx->prepare) + return 0; + + vsc8584_macsec_del_secy(ctx); + return vsc8584_macsec_add_secy(ctx); +} + +static int vsc8584_macsec_add_rxsc(struct macsec_context *ctx) +{ + /* Nothing to do */ + return 0; +} + +static int vsc8584_macsec_upd_rxsc(struct macsec_context *ctx) +{ + return -EOPNOTSUPP; +} + +static int vsc8584_macsec_del_rxsc(struct macsec_context *ctx) +{ + struct vsc8531_private *priv = ctx->phydev->priv; + struct macsec_flow *flow, *tmp; + + /* No operation to perform before the commit step */ + if (ctx->prepare) + return 0; + + list_for_each_entry_safe(flow, tmp, &priv->macsec_flows, list) { + if (flow->bank == MACSEC_INGR && flow->rx_sa && + flow->rx_sa->sc->sci == ctx->rx_sc->sci) + vsc8584_macsec_del_flow(ctx->phydev, flow); + } + + return 0; +} + +static int vsc8584_macsec_add_rxsa(struct macsec_context *ctx) +{ + struct macsec_flow *flow = NULL; + + if (ctx->prepare) + return __vsc8584_macsec_add_rxsa(ctx, flow, false); + + flow = vsc8584_macsec_find_flow(ctx, MACSEC_INGR); + if (IS_ERR(flow)) + return PTR_ERR(flow); + + vsc8584_macsec_flow_enable(ctx->phydev, flow); + return 0; +} + +static int vsc8584_macsec_upd_rxsa(struct macsec_context *ctx) +{ + struct macsec_flow *flow; + + flow = vsc8584_macsec_find_flow(ctx, MACSEC_INGR); + if (IS_ERR(flow)) + return PTR_ERR(flow); + + if (ctx->prepare) { + /* Make sure the flow is disabled before updating it */ + vsc8584_macsec_flow_disable(ctx->phydev, flow); + + return __vsc8584_macsec_add_rxsa(ctx, flow, true); + } + + vsc8584_macsec_flow_enable(ctx->phydev, flow); + return 0; +} + +static int vsc8584_macsec_del_rxsa(struct macsec_context *ctx) +{ + struct macsec_flow *flow; + + flow = vsc8584_macsec_find_flow(ctx, MACSEC_INGR); + + if (IS_ERR(flow)) + return PTR_ERR(flow); + if (ctx->prepare) + return 0; + + vsc8584_macsec_del_flow(ctx->phydev, flow); + return 0; +} + +static int vsc8584_macsec_add_txsa(struct macsec_context *ctx) +{ + struct macsec_flow *flow = NULL; + + if (ctx->prepare) + return __vsc8584_macsec_add_txsa(ctx, flow, false); + + flow = vsc8584_macsec_find_flow(ctx, MACSEC_EGR); + if (IS_ERR(flow)) + return PTR_ERR(flow); + + vsc8584_macsec_flow_enable(ctx->phydev, flow); + return 0; +} + +static int vsc8584_macsec_upd_txsa(struct macsec_context *ctx) +{ + struct macsec_flow *flow; + + flow = vsc8584_macsec_find_flow(ctx, MACSEC_EGR); + if (IS_ERR(flow)) + return PTR_ERR(flow); + + if (ctx->prepare) { + /* Make sure the flow is disabled before updating it */ + vsc8584_macsec_flow_disable(ctx->phydev, flow); + + return __vsc8584_macsec_add_txsa(ctx, flow, true); + } + + vsc8584_macsec_flow_enable(ctx->phydev, flow); + return 0; +} + +static int vsc8584_macsec_del_txsa(struct macsec_context *ctx) +{ + struct macsec_flow *flow; + + flow = vsc8584_macsec_find_flow(ctx, MACSEC_EGR); + + if (IS_ERR(flow)) + return PTR_ERR(flow); + if (ctx->prepare) + return 0; + + vsc8584_macsec_del_flow(ctx->phydev, flow); + return 0; +} + +static struct macsec_ops vsc8584_macsec_ops = { + .mdo_dev_open = vsc8584_macsec_dev_open, + .mdo_dev_stop = vsc8584_macsec_dev_stop, + .mdo_add_secy = vsc8584_macsec_add_secy, + .mdo_upd_secy = vsc8584_macsec_upd_secy, + .mdo_del_secy = vsc8584_macsec_del_secy, + .mdo_add_rxsc = vsc8584_macsec_add_rxsc, + .mdo_upd_rxsc = vsc8584_macsec_upd_rxsc, + .mdo_del_rxsc = vsc8584_macsec_del_rxsc, + .mdo_add_rxsa = vsc8584_macsec_add_rxsa, + .mdo_upd_rxsa = vsc8584_macsec_upd_rxsa, + .mdo_del_rxsa = vsc8584_macsec_del_rxsa, + .mdo_add_txsa = vsc8584_macsec_add_txsa, + .mdo_upd_txsa = vsc8584_macsec_upd_txsa, + .mdo_del_txsa = vsc8584_macsec_del_txsa, +}; + +int vsc8584_macsec_init(struct phy_device *phydev) +{ + struct vsc8531_private *vsc8531 = phydev->priv; + + switch (phydev->phy_id & phydev->drv->phy_id_mask) { + case PHY_ID_VSC856X: + case PHY_ID_VSC8575: + case PHY_ID_VSC8582: + case PHY_ID_VSC8584: + INIT_LIST_HEAD(&vsc8531->macsec_flows); + vsc8531->secy = NULL; + + phydev->macsec_ops = &vsc8584_macsec_ops; + + return __vsc8584_macsec_init(phydev); + } + + return 0; +} + +void vsc8584_handle_macsec_interrupt(struct phy_device *phydev) +{ + struct vsc8531_private *priv = phydev->priv; + struct macsec_flow *flow, *tmp; + u32 cause, rec; + + /* Check MACsec PN rollover */ + cause = vsc8584_macsec_phy_read(phydev, MACSEC_EGR, + MSCC_MS_INTR_CTRL_STATUS); + cause &= MSCC_MS_INTR_CTRL_STATUS_INTR_CLR_STATUS_M; + if (!(cause & MACSEC_INTR_CTRL_STATUS_ROLLOVER)) + return; + + rec = 6 + priv->secy->key_len / sizeof(u32); + list_for_each_entry_safe(flow, tmp, &priv->macsec_flows, list) { + u32 val; + + if (flow->bank != MACSEC_EGR || !flow->has_transformation) + continue; + + val = vsc8584_macsec_phy_read(phydev, MACSEC_EGR, + MSCC_MS_XFORM_REC(flow->index, rec)); + if (val == 0xffffffff) { + vsc8584_macsec_flow_disable(phydev, flow); + macsec_pn_wrapped(priv->secy, flow->tx_sa); + return; + } + } +} + +void vsc8584_config_macsec_intr(struct phy_device *phydev) +{ + phy_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_EXTENDED_2); + phy_write(phydev, MSCC_PHY_EXTENDED_INT, MSCC_PHY_EXTENDED_INT_MS_EGR); + phy_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_STANDARD); + + vsc8584_macsec_phy_write(phydev, MACSEC_EGR, MSCC_MS_AIC_CTRL, 0xf); + vsc8584_macsec_phy_write(phydev, MACSEC_EGR, MSCC_MS_INTR_CTRL_STATUS, + MSCC_MS_INTR_CTRL_STATUS_INTR_ENABLE(MACSEC_INTR_CTRL_STATUS_ROLLOVER)); +} diff --git a/drivers/net/phy/mscc_macsec.h b/drivers/net/phy/mscc/mscc_macsec.h index d9ab6aba7482..d0783944d106 100644 --- a/drivers/net/phy/mscc_macsec.h +++ b/drivers/net/phy/mscc/mscc_macsec.h @@ -1,12 +1,14 @@ /* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ /* - * Microsemi Ocelot Switch driver + * Driver for Microsemi VSC85xx PHYs * * Copyright (c) 2018 Microsemi Corporation */ -#ifndef _MSCC_OCELOT_MACSEC_H_ -#define _MSCC_OCELOT_MACSEC_H_ +#ifndef _MSCC_PHY_MACSEC_H_ +#define _MSCC_PHY_MACSEC_H_ + +#include <net/macsec.h> #define MSCC_MS_MAX_FLOWS 16 @@ -58,6 +60,62 @@ enum mscc_macsec_validate_levels { MSCC_MS_VALIDATE_STRICT = 2, }; +enum macsec_bank { + FC_BUFFER = 0x04, + HOST_MAC = 0x05, + LINE_MAC = 0x06, + IP_1588 = 0x0e, + MACSEC_INGR = 0x38, + MACSEC_EGR = 0x3c, +}; + +struct macsec_flow { + struct list_head list; + enum mscc_macsec_destination_ports port; + enum macsec_bank bank; + u32 index; + int assoc_num; + bool has_transformation; + + /* Highest takes precedence [0..15] */ + u8 priority; + + u8 key[MACSEC_KEYID_LEN]; + + union { + struct macsec_rx_sa *rx_sa; + struct macsec_tx_sa *tx_sa; + }; + + /* Matching */ + struct { + u8 sci:1; + u8 tagged:1; + u8 untagged:1; + u8 etype:1; + } match; + + u16 etype; + + /* Action */ + struct { + u8 bypass:1; + u8 drop:1; + } action; +}; + +#define MSCC_EXT_PAGE_MACSEC_17 17 +#define MSCC_EXT_PAGE_MACSEC_18 18 + +#define MSCC_EXT_PAGE_MACSEC_19 19 +#define MSCC_PHY_MACSEC_19_REG_ADDR(x) (x) +#define MSCC_PHY_MACSEC_19_TARGET(x) ((x) << 12) +#define MSCC_PHY_MACSEC_19_READ BIT(14) +#define MSCC_PHY_MACSEC_19_CMD BIT(15) + +#define MSCC_EXT_PAGE_MACSEC_20 20 +#define MSCC_PHY_MACSEC_20_TARGET(x) (x) + #define MSCC_MS_XFORM_REC(x, y) (((x) << 5) + (y)) #define MSCC_MS_ENA_CFG 0x800 #define MSCC_MS_FC_CFG 0x804 @@ -263,4 +321,4 @@ enum mscc_macsec_validate_levels { #define MSCC_MS_INTR_CTRL_STATUS_INTR_ENABLE_M GENMASK(31, 16) #define MACSEC_INTR_CTRL_STATUS_ROLLOVER BIT(5) -#endif +#endif /* _MSCC_PHY_MACSEC_H_ */ diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc/mscc_main.c index b2eac7ee0288..19603081dd14 100644 --- a/drivers/net/phy/mscc.c +++ b/drivers/net/phy/mscc/mscc_main.c @@ -18,361 +18,7 @@ #include <linux/netdevice.h> #include <dt-bindings/net/mscc-phy-vsc8531.h> -#include <linux/scatterlist.h> -#include <crypto/skcipher.h> - -#if IS_ENABLED(CONFIG_MACSEC) -#include <net/macsec.h> -#endif - -#include "mscc_macsec.h" -#include "mscc_mac.h" -#include "mscc_fc_buffer.h" - -enum rgmii_rx_clock_delay { - RGMII_RX_CLK_DELAY_0_2_NS = 0, - RGMII_RX_CLK_DELAY_0_8_NS = 1, - RGMII_RX_CLK_DELAY_1_1_NS = 2, - RGMII_RX_CLK_DELAY_1_7_NS = 3, - RGMII_RX_CLK_DELAY_2_0_NS = 4, - RGMII_RX_CLK_DELAY_2_3_NS = 5, - RGMII_RX_CLK_DELAY_2_6_NS = 6, - RGMII_RX_CLK_DELAY_3_4_NS = 7 -}; - -/* Microsemi VSC85xx PHY registers */ -/* IEEE 802. Std Registers */ -#define MSCC_PHY_BYPASS_CONTROL 18 -#define DISABLE_HP_AUTO_MDIX_MASK 0x0080 -#define DISABLE_PAIR_SWAP_CORR_MASK 0x0020 -#define DISABLE_POLARITY_CORR_MASK 0x0010 -#define PARALLEL_DET_IGNORE_ADVERTISED 0x0008 - -#define MSCC_PHY_EXT_CNTL_STATUS 22 -#define SMI_BROADCAST_WR_EN 0x0001 - -#define MSCC_PHY_ERR_RX_CNT 19 -#define MSCC_PHY_ERR_FALSE_CARRIER_CNT 20 -#define MSCC_PHY_ERR_LINK_DISCONNECT_CNT 21 -#define ERR_CNT_MASK GENMASK(7, 0) - -#define MSCC_PHY_EXT_PHY_CNTL_1 23 -#define MAC_IF_SELECTION_MASK 0x1800 -#define MAC_IF_SELECTION_GMII 0 -#define MAC_IF_SELECTION_RMII 1 -#define MAC_IF_SELECTION_RGMII 2 -#define MAC_IF_SELECTION_POS 11 -#define VSC8584_MAC_IF_SELECTION_MASK 0x1000 -#define VSC8584_MAC_IF_SELECTION_SGMII 0 -#define VSC8584_MAC_IF_SELECTION_1000BASEX 1 -#define VSC8584_MAC_IF_SELECTION_POS 12 -#define FAR_END_LOOPBACK_MODE_MASK 0x0008 -#define MEDIA_OP_MODE_MASK 0x0700 -#define MEDIA_OP_MODE_COPPER 0 -#define MEDIA_OP_MODE_SERDES 1 -#define MEDIA_OP_MODE_1000BASEX 2 -#define MEDIA_OP_MODE_100BASEFX 3 -#define MEDIA_OP_MODE_AMS_COPPER_SERDES 5 -#define MEDIA_OP_MODE_AMS_COPPER_1000BASEX 6 -#define MEDIA_OP_MODE_AMS_COPPER_100BASEFX 7 -#define MEDIA_OP_MODE_POS 8 - -#define MSCC_PHY_EXT_PHY_CNTL_2 24 - -#define MII_VSC85XX_INT_MASK 25 -#define MII_VSC85XX_INT_MASK_MDINT BIT(15) -#define MII_VSC85XX_INT_MASK_LINK_CHG BIT(13) -#define MII_VSC85XX_INT_MASK_WOL BIT(6) -#define MII_VSC85XX_INT_MASK_EXT BIT(5) -#define MII_VSC85XX_INT_STATUS 26 - -#define MII_VSC85XX_INT_MASK_MASK (MII_VSC85XX_INT_MASK_MDINT | \ - MII_VSC85XX_INT_MASK_LINK_CHG | \ - MII_VSC85XX_INT_MASK_EXT) - -#define MSCC_PHY_WOL_MAC_CONTROL 27 -#define EDGE_RATE_CNTL_POS 5 -#define EDGE_RATE_CNTL_MASK 0x00E0 - -#define MSCC_PHY_DEV_AUX_CNTL 28 -#define HP_AUTO_MDIX_X_OVER_IND_MASK 0x2000 - -#define MSCC_PHY_LED_MODE_SEL 29 -#define LED_MODE_SEL_POS(x) ((x) * 4) -#define LED_MODE_SEL_MASK(x) (GENMASK(3, 0) << LED_MODE_SEL_POS(x)) -#define LED_MODE_SEL(x, mode) (((mode) << LED_MODE_SEL_POS(x)) & LED_MODE_SEL_MASK(x)) - -#define MSCC_EXT_PAGE_CSR_CNTL_17 17 -#define MSCC_EXT_PAGE_CSR_CNTL_18 18 - -#define MSCC_EXT_PAGE_CSR_CNTL_19 19 -#define MSCC_PHY_CSR_CNTL_19_REG_ADDR(x) (x) -#define MSCC_PHY_CSR_CNTL_19_TARGET(x) ((x) << 12) -#define MSCC_PHY_CSR_CNTL_19_READ BIT(14) -#define MSCC_PHY_CSR_CNTL_19_CMD BIT(15) - -#define MSCC_EXT_PAGE_CSR_CNTL_20 20 -#define MSCC_PHY_CSR_CNTL_20_TARGET(x) (x) - -#define PHY_MCB_TARGET 0x07 -#define PHY_MCB_S6G_WRITE BIT(31) -#define PHY_MCB_S6G_READ BIT(30) - -#define PHY_S6G_PLL5G_CFG0 0x06 -#define PHY_S6G_LCPLL_CFG 0x11 -#define PHY_S6G_PLL_CFG 0x2b -#define PHY_S6G_COMMON_CFG 0x2c -#define PHY_S6G_GPC_CFG 0x2e -#define PHY_S6G_MISC_CFG 0x3b -#define PHY_MCB_S6G_CFG 0x3f -#define PHY_S6G_DFT_CFG2 0x3e -#define PHY_S6G_PLL_STATUS 0x31 -#define PHY_S6G_IB_STATUS0 0x2f - -#define PHY_S6G_SYS_RST_POS 31 -#define PHY_S6G_ENA_LANE_POS 18 -#define PHY_S6G_ENA_LOOP_POS 8 -#define PHY_S6G_QRATE_POS 6 -#define PHY_S6G_IF_MODE_POS 4 -#define PHY_S6G_PLL_ENA_OFFS_POS 21 -#define PHY_S6G_PLL_FSM_CTRL_DATA_POS 8 -#define PHY_S6G_PLL_FSM_ENA_POS 7 - -#define MSCC_EXT_PAGE_MACSEC_17 17 -#define MSCC_EXT_PAGE_MACSEC_18 18 - -#define MSCC_EXT_PAGE_MACSEC_19 19 -#define MSCC_PHY_MACSEC_19_REG_ADDR(x) (x) -#define MSCC_PHY_MACSEC_19_TARGET(x) ((x) << 12) -#define MSCC_PHY_MACSEC_19_READ BIT(14) -#define MSCC_PHY_MACSEC_19_CMD BIT(15) - -#define MSCC_EXT_PAGE_MACSEC_20 20 -#define MSCC_PHY_MACSEC_20_TARGET(x) (x) -enum macsec_bank { - FC_BUFFER = 0x04, - HOST_MAC = 0x05, - LINE_MAC = 0x06, - IP_1588 = 0x0e, - MACSEC_INGR = 0x38, - MACSEC_EGR = 0x3c, -}; - -#define MSCC_EXT_PAGE_ACCESS 31 -#define MSCC_PHY_PAGE_STANDARD 0x0000 /* Standard registers */ -#define MSCC_PHY_PAGE_EXTENDED 0x0001 /* Extended registers */ -#define MSCC_PHY_PAGE_EXTENDED_2 0x0002 /* Extended reg - page 2 */ -#define MSCC_PHY_PAGE_EXTENDED_3 0x0003 /* Extended reg - page 3 */ -#define MSCC_PHY_PAGE_EXTENDED_4 0x0004 /* Extended reg - page 4 */ -#define MSCC_PHY_PAGE_CSR_CNTL MSCC_PHY_PAGE_EXTENDED_4 -#define MSCC_PHY_PAGE_MACSEC MSCC_PHY_PAGE_EXTENDED_4 -/* Extended reg - GPIO; this is a bank of registers that are shared for all PHYs - * in the same package. - */ -#define MSCC_PHY_PAGE_EXTENDED_GPIO 0x0010 /* Extended reg - GPIO */ -#define MSCC_PHY_PAGE_TEST 0x2a30 /* Test reg */ -#define MSCC_PHY_PAGE_TR 0x52b5 /* Token ring registers */ - -/* Extended Page 1 Registers */ -#define MSCC_PHY_CU_MEDIA_CRC_VALID_CNT 18 -#define VALID_CRC_CNT_CRC_MASK GENMASK(13, 0) - -#define MSCC_PHY_EXT_MODE_CNTL 19 -#define FORCE_MDI_CROSSOVER_MASK 0x000C -#define FORCE_MDI_CROSSOVER_MDIX 0x000C -#define FORCE_MDI_CROSSOVER_MDI 0x0008 - -#define MSCC_PHY_ACTIPHY_CNTL 20 -#define PHY_ADDR_REVERSED 0x0200 -#define DOWNSHIFT_CNTL_MASK 0x001C -#define DOWNSHIFT_EN 0x0010 -#define DOWNSHIFT_CNTL_POS 2 - -#define MSCC_PHY_EXT_PHY_CNTL_4 23 -#define PHY_CNTL_4_ADDR_POS 11 - -#define MSCC_PHY_VERIPHY_CNTL_2 25 - -#define MSCC_PHY_VERIPHY_CNTL_3 26 - -/* Extended Page 2 Registers */ -#define MSCC_PHY_CU_PMD_TX_CNTL 16 - -#define MSCC_PHY_RGMII_CNTL 20 -#define RGMII_RX_CLK_DELAY_MASK 0x0070 -#define RGMII_RX_CLK_DELAY_POS 4 - -#define MSCC_PHY_WOL_LOWER_MAC_ADDR 21 -#define MSCC_PHY_WOL_MID_MAC_ADDR 22 -#define MSCC_PHY_WOL_UPPER_MAC_ADDR 23 -#define MSCC_PHY_WOL_LOWER_PASSWD 24 -#define MSCC_PHY_WOL_MID_PASSWD 25 -#define MSCC_PHY_WOL_UPPER_PASSWD 26 - -#define MSCC_PHY_WOL_MAC_CONTROL 27 -#define SECURE_ON_ENABLE 0x8000 -#define SECURE_ON_PASSWD_LEN_4 0x4000 - -#define MSCC_PHY_EXTENDED_INT 28 -#define MSCC_PHY_EXTENDED_INT_MS_EGR BIT(9) - -/* Extended Page 3 Registers */ -#define MSCC_PHY_SERDES_TX_VALID_CNT 21 -#define MSCC_PHY_SERDES_TX_CRC_ERR_CNT 22 -#define MSCC_PHY_SERDES_RX_VALID_CNT 28 -#define MSCC_PHY_SERDES_RX_CRC_ERR_CNT 29 - -/* Extended page GPIO Registers */ -#define MSCC_DW8051_CNTL_STATUS 0 -#define MICRO_NSOFT_RESET 0x8000 -#define RUN_FROM_INT_ROM 0x4000 -#define AUTOINC_ADDR 0x2000 -#define PATCH_RAM_CLK 0x1000 -#define MICRO_PATCH_EN 0x0080 -#define DW8051_CLK_EN 0x0010 -#define MICRO_CLK_EN 0x0008 -#define MICRO_CLK_DIVIDE(x) ((x) >> 1) -#define MSCC_DW8051_VLD_MASK 0xf1ff - -/* x Address in range 1-4 */ -#define MSCC_TRAP_ROM_ADDR(x) ((x) * 2 + 1) -#define MSCC_PATCH_RAM_ADDR(x) (((x) + 1) * 2) -#define MSCC_INT_MEM_ADDR 11 - -#define MSCC_INT_MEM_CNTL 12 -#define READ_SFR 0x6000 -#define READ_PRAM 0x4000 -#define READ_ROM 0x2000 -#define READ_RAM 0x0000 -#define INT_MEM_WRITE_EN 0x1000 -#define EN_PATCH_RAM_TRAP_ADDR(x) (0x0100 << ((x) - 1)) -#define INT_MEM_DATA_M 0x00ff -#define INT_MEM_DATA(x) (INT_MEM_DATA_M & (x)) - -#define MSCC_PHY_PROC_CMD 18 -#define PROC_CMD_NCOMPLETED 0x8000 -#define PROC_CMD_FAILED 0x4000 -#define PROC_CMD_SGMII_PORT(x) ((x) << 8) -#define PROC_CMD_FIBER_PORT(x) (0x0100 << (x) % 4) -#define PROC_CMD_QSGMII_PORT 0x0c00 -#define PROC_CMD_RST_CONF_PORT 0x0080 -#define PROC_CMD_RECONF_PORT 0x0000 -#define PROC_CMD_READ_MOD_WRITE_PORT 0x0040 -#define PROC_CMD_WRITE 0x0040 -#define PROC_CMD_READ 0x0000 -#define PROC_CMD_FIBER_DISABLE 0x0020 -#define PROC_CMD_FIBER_100BASE_FX 0x0010 -#define PROC_CMD_FIBER_1000BASE_X 0x0000 -#define PROC_CMD_SGMII_MAC 0x0030 -#define PROC_CMD_QSGMII_MAC 0x0020 -#define PROC_CMD_NO_MAC_CONF 0x0000 -#define PROC_CMD_1588_DEFAULT_INIT 0x0010 -#define PROC_CMD_NOP 0x000f -#define PROC_CMD_PHY_INIT 0x000a -#define PROC_CMD_CRC16 0x0008 -#define PROC_CMD_FIBER_MEDIA_CONF 0x0001 -#define PROC_CMD_MCB_ACCESS_MAC_CONF 0x0000 -#define PROC_CMD_NCOMPLETED_TIMEOUT_MS 500 - -#define MSCC_PHY_MAC_CFG_FASTLINK 19 -#define MAC_CFG_MASK 0xc000 -#define MAC_CFG_SGMII 0x0000 -#define MAC_CFG_QSGMII 0x4000 - -/* Test page Registers */ -#define MSCC_PHY_TEST_PAGE_5 5 -#define MSCC_PHY_TEST_PAGE_8 8 -#define MSCC_PHY_TEST_PAGE_9 9 -#define MSCC_PHY_TEST_PAGE_20 20 -#define MSCC_PHY_TEST_PAGE_24 24 - -/* Token ring page Registers */ -#define MSCC_PHY_TR_CNTL 16 -#define TR_WRITE 0x8000 -#define TR_ADDR(x) (0x7fff & (x)) -#define MSCC_PHY_TR_LSB 17 -#define MSCC_PHY_TR_MSB 18 - -/* Microsemi PHY ID's - * Code assumes lowest nibble is 0 - */ -#define PHY_ID_VSC8504 0x000704c0 -#define PHY_ID_VSC8514 0x00070670 -#define PHY_ID_VSC8530 0x00070560 -#define PHY_ID_VSC8531 0x00070570 -#define PHY_ID_VSC8540 0x00070760 -#define PHY_ID_VSC8541 0x00070770 -#define PHY_ID_VSC8552 0x000704e0 -#define PHY_ID_VSC856X 0x000707e0 -#define PHY_ID_VSC8572 0x000704d0 -#define PHY_ID_VSC8574 0x000704a0 -#define PHY_ID_VSC8575 0x000707d0 -#define PHY_ID_VSC8582 0x000707b0 -#define PHY_ID_VSC8584 0x000707c0 - -#define MSCC_VDDMAC_1500 1500 -#define MSCC_VDDMAC_1800 1800 -#define MSCC_VDDMAC_2500 2500 -#define MSCC_VDDMAC_3300 3300 - -#define DOWNSHIFT_COUNT_MAX 5 - -#define MAX_LEDS 4 - -#define VSC8584_SUPP_LED_MODES (BIT(VSC8531_LINK_ACTIVITY) | \ - BIT(VSC8531_LINK_1000_ACTIVITY) | \ - BIT(VSC8531_LINK_100_ACTIVITY) | \ - BIT(VSC8531_LINK_10_ACTIVITY) | \ - BIT(VSC8531_LINK_100_1000_ACTIVITY) | \ - BIT(VSC8531_LINK_10_1000_ACTIVITY) | \ - BIT(VSC8531_LINK_10_100_ACTIVITY) | \ - BIT(VSC8584_LINK_100FX_1000X_ACTIVITY) | \ - BIT(VSC8531_DUPLEX_COLLISION) | \ - BIT(VSC8531_COLLISION) | \ - BIT(VSC8531_ACTIVITY) | \ - BIT(VSC8584_100FX_1000X_ACTIVITY) | \ - BIT(VSC8531_AUTONEG_FAULT) | \ - BIT(VSC8531_SERIAL_MODE) | \ - BIT(VSC8531_FORCE_LED_OFF) | \ - BIT(VSC8531_FORCE_LED_ON)) - -#define VSC85XX_SUPP_LED_MODES (BIT(VSC8531_LINK_ACTIVITY) | \ - BIT(VSC8531_LINK_1000_ACTIVITY) | \ - BIT(VSC8531_LINK_100_ACTIVITY) | \ - BIT(VSC8531_LINK_10_ACTIVITY) | \ - BIT(VSC8531_LINK_100_1000_ACTIVITY) | \ - BIT(VSC8531_LINK_10_1000_ACTIVITY) | \ - BIT(VSC8531_LINK_10_100_ACTIVITY) | \ - BIT(VSC8531_DUPLEX_COLLISION) | \ - BIT(VSC8531_COLLISION) | \ - BIT(VSC8531_ACTIVITY) | \ - BIT(VSC8531_AUTONEG_FAULT) | \ - BIT(VSC8531_SERIAL_MODE) | \ - BIT(VSC8531_FORCE_LED_OFF) | \ - BIT(VSC8531_FORCE_LED_ON)) - -#define MSCC_VSC8584_REVB_INT8051_FW "microchip/mscc_vsc8584_revb_int8051_fb48.bin" -#define MSCC_VSC8584_REVB_INT8051_FW_START_ADDR 0xe800 -#define MSCC_VSC8584_REVB_INT8051_FW_CRC 0xfb48 - -#define MSCC_VSC8574_REVB_INT8051_FW "microchip/mscc_vsc8574_revb_int8051_29e8.bin" -#define MSCC_VSC8574_REVB_INT8051_FW_START_ADDR 0x4000 -#define MSCC_VSC8574_REVB_INT8051_FW_CRC 0x29e8 - -#define VSC8584_REVB 0x0001 -#define MSCC_DEV_REV_MASK GENMASK(3, 0) - -struct reg_val { - u16 reg; - u32 val; -}; - -struct vsc85xx_hw_stat { - const char *string; - u8 reg; - u16 page; - u16 mask; -}; +#include "mscc.h" static const struct vsc85xx_hw_stat vsc85xx_hw_stats[] = { { @@ -452,85 +98,14 @@ static const struct vsc85xx_hw_stat vsc8584_hw_stats[] = { }, }; -#if IS_ENABLED(CONFIG_MACSEC) -struct macsec_flow { - struct list_head list; - enum mscc_macsec_destination_ports port; - enum macsec_bank bank; - u32 index; - int assoc_num; - bool has_transformation; - - /* Highest takes precedence [0..15] */ - u8 priority; - - u8 key[MACSEC_KEYID_LEN]; - - union { - struct macsec_rx_sa *rx_sa; - struct macsec_tx_sa *tx_sa; - }; - - /* Matching */ - struct { - u8 sci:1; - u8 tagged:1; - u8 untagged:1; - u8 etype:1; - } match; - - u16 etype; - - /* Action */ - struct { - u8 bypass:1; - u8 drop:1; - } action; - -}; -#endif - -struct vsc8531_private { - int rate_magic; - u16 supp_led_modes; - u32 leds_mode[MAX_LEDS]; - u8 nleds; - const struct vsc85xx_hw_stat *hw_stats; - u64 *stats; - int nstats; - bool pkg_init; - /* For multiple port PHYs; the MDIO address of the base PHY in the - * package. - */ - unsigned int base_addr; - -#if IS_ENABLED(CONFIG_MACSEC) - /* MACsec fields: - * - One SecY per device (enforced at the s/w implementation level) - * - macsec_flows: list of h/w flows - * - ingr_flows: bitmap of ingress flows - * - egr_flows: bitmap of egress flows - */ - struct macsec_secy *secy; - struct list_head macsec_flows; - unsigned long ingr_flows; - unsigned long egr_flows; -#endif -}; - #ifdef CONFIG_OF_MDIO -struct vsc8531_edge_rate_table { - u32 vddmac; - u32 slowdown[8]; -}; - static const struct vsc8531_edge_rate_table edge_table[] = { {MSCC_VDDMAC_3300, { 0, 2, 4, 7, 10, 17, 29, 53} }, {MSCC_VDDMAC_2500, { 0, 3, 6, 10, 14, 23, 37, 63} }, {MSCC_VDDMAC_1800, { 0, 5, 9, 16, 23, 35, 52, 76} }, {MSCC_VDDMAC_1500, { 0, 6, 14, 21, 29, 42, 58, 77} }, }; -#endif /* CONFIG_OF_MDIO */ +#endif static int vsc85xx_phy_read_page(struct phy_device *phydev) { @@ -916,6 +491,9 @@ static int vsc85xx_mac_if_set(struct phy_device *phydev, reg_val = phy_read(phydev, MSCC_PHY_EXT_PHY_CNTL_1); reg_val &= ~(MAC_IF_SELECTION_MASK); switch (interface) { + case PHY_INTERFACE_MODE_RGMII_TXID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII: reg_val |= (MAC_IF_SELECTION_RGMII << MAC_IF_SELECTION_POS); break; @@ -944,16 +522,26 @@ out_unlock: static int vsc85xx_default_config(struct phy_device *phydev) { + u16 reg_val = 0; int rc; - u16 reg_val; phydev->mdix_ctrl = ETH_TP_MDI_AUTO; + + if (!phy_interface_mode_is_rgmii(phydev->interface)) + return 0; + mutex_lock(&phydev->lock); - reg_val = RGMII_RX_CLK_DELAY_1_1_NS << RGMII_RX_CLK_DELAY_POS; + if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID || + phydev->interface == PHY_INTERFACE_MODE_RGMII_ID) + reg_val |= RGMII_CLK_DELAY_2_0_NS << RGMII_RX_CLK_DELAY_POS; + if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID || + phydev->interface == PHY_INTERFACE_MODE_RGMII_ID) + reg_val |= RGMII_CLK_DELAY_2_0_NS << RGMII_TX_CLK_DELAY_POS; rc = phy_modify_paged(phydev, MSCC_PHY_PAGE_EXTENDED_2, - MSCC_PHY_RGMII_CNTL, RGMII_RX_CLK_DELAY_MASK, + MSCC_PHY_RGMII_CNTL, + RGMII_RX_CLK_DELAY_MASK | RGMII_TX_CLK_DELAY_MASK, reg_val); mutex_unlock(&phydev->lock); @@ -1676,978 +1264,6 @@ out: return ret; } -#if IS_ENABLED(CONFIG_MACSEC) -static u32 vsc8584_macsec_phy_read(struct phy_device *phydev, - enum macsec_bank bank, u32 reg) -{ - u32 val, val_l = 0, val_h = 0; - unsigned long deadline; - int rc; - - rc = phy_select_page(phydev, MSCC_PHY_PAGE_MACSEC); - if (rc < 0) - goto failed; - - __phy_write(phydev, MSCC_EXT_PAGE_MACSEC_20, - MSCC_PHY_MACSEC_20_TARGET(bank >> 2)); - - if (bank >> 2 == 0x1) - /* non-MACsec access */ - bank &= 0x3; - else - bank = 0; - - __phy_write(phydev, MSCC_EXT_PAGE_MACSEC_19, - MSCC_PHY_MACSEC_19_CMD | MSCC_PHY_MACSEC_19_READ | - MSCC_PHY_MACSEC_19_REG_ADDR(reg) | - MSCC_PHY_MACSEC_19_TARGET(bank)); - - deadline = jiffies + msecs_to_jiffies(PROC_CMD_NCOMPLETED_TIMEOUT_MS); - do { - val = __phy_read(phydev, MSCC_EXT_PAGE_MACSEC_19); - } while (time_before(jiffies, deadline) && !(val & MSCC_PHY_MACSEC_19_CMD)); - - val_l = __phy_read(phydev, MSCC_EXT_PAGE_MACSEC_17); - val_h = __phy_read(phydev, MSCC_EXT_PAGE_MACSEC_18); - -failed: - phy_restore_page(phydev, rc, rc); - - return (val_h << 16) | val_l; -} - -static void vsc8584_macsec_phy_write(struct phy_device *phydev, - enum macsec_bank bank, u32 reg, u32 val) -{ - unsigned long deadline; - int rc; - - rc = phy_select_page(phydev, MSCC_PHY_PAGE_MACSEC); - if (rc < 0) - goto failed; - - __phy_write(phydev, MSCC_EXT_PAGE_MACSEC_20, - MSCC_PHY_MACSEC_20_TARGET(bank >> 2)); - - if ((bank >> 2 == 0x1) || (bank >> 2 == 0x3)) - bank &= 0x3; - else - /* MACsec access */ - bank = 0; - - __phy_write(phydev, MSCC_EXT_PAGE_MACSEC_17, (u16)val); - __phy_write(phydev, MSCC_EXT_PAGE_MACSEC_18, (u16)(val >> 16)); - - __phy_write(phydev, MSCC_EXT_PAGE_MACSEC_19, - MSCC_PHY_MACSEC_19_CMD | MSCC_PHY_MACSEC_19_REG_ADDR(reg) | - MSCC_PHY_MACSEC_19_TARGET(bank)); - - deadline = jiffies + msecs_to_jiffies(PROC_CMD_NCOMPLETED_TIMEOUT_MS); - do { - val = __phy_read(phydev, MSCC_EXT_PAGE_MACSEC_19); - } while (time_before(jiffies, deadline) && !(val & MSCC_PHY_MACSEC_19_CMD)); - -failed: - phy_restore_page(phydev, rc, rc); -} - -static void vsc8584_macsec_classification(struct phy_device *phydev, - enum macsec_bank bank) -{ - /* enable VLAN tag parsing */ - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_CP_TAG, - MSCC_MS_SAM_CP_TAG_PARSE_STAG | - MSCC_MS_SAM_CP_TAG_PARSE_QTAG | - MSCC_MS_SAM_CP_TAG_PARSE_QINQ); -} - -static void vsc8584_macsec_flow_default_action(struct phy_device *phydev, - enum macsec_bank bank, - bool block) -{ - u32 port = (bank == MACSEC_INGR) ? - MSCC_MS_PORT_UNCONTROLLED : MSCC_MS_PORT_COMMON; - u32 action = MSCC_MS_FLOW_BYPASS; - - if (block) - action = MSCC_MS_FLOW_DROP; - - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_NM_FLOW_NCP, - /* MACsec untagged */ - MSCC_MS_SAM_NM_FLOW_NCP_UNTAGGED_FLOW_TYPE(action) | - MSCC_MS_SAM_NM_FLOW_NCP_UNTAGGED_DROP_ACTION(MSCC_MS_ACTION_DROP) | - MSCC_MS_SAM_NM_FLOW_NCP_UNTAGGED_DEST_PORT(port) | - /* MACsec tagged */ - MSCC_MS_SAM_NM_FLOW_NCP_TAGGED_FLOW_TYPE(action) | - MSCC_MS_SAM_NM_FLOW_NCP_TAGGED_DROP_ACTION(MSCC_MS_ACTION_DROP) | - MSCC_MS_SAM_NM_FLOW_NCP_TAGGED_DEST_PORT(port) | - /* Bad tag */ - MSCC_MS_SAM_NM_FLOW_NCP_BADTAG_FLOW_TYPE(action) | - MSCC_MS_SAM_NM_FLOW_NCP_BADTAG_DROP_ACTION(MSCC_MS_ACTION_DROP) | - MSCC_MS_SAM_NM_FLOW_NCP_BADTAG_DEST_PORT(port) | - /* Kay tag */ - MSCC_MS_SAM_NM_FLOW_NCP_KAY_FLOW_TYPE(action) | - MSCC_MS_SAM_NM_FLOW_NCP_KAY_DROP_ACTION(MSCC_MS_ACTION_DROP) | - MSCC_MS_SAM_NM_FLOW_NCP_KAY_DEST_PORT(port)); - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_NM_FLOW_CP, - /* MACsec untagged */ - MSCC_MS_SAM_NM_FLOW_NCP_UNTAGGED_FLOW_TYPE(action) | - MSCC_MS_SAM_NM_FLOW_CP_UNTAGGED_DROP_ACTION(MSCC_MS_ACTION_DROP) | - MSCC_MS_SAM_NM_FLOW_CP_UNTAGGED_DEST_PORT(port) | - /* MACsec tagged */ - MSCC_MS_SAM_NM_FLOW_NCP_TAGGED_FLOW_TYPE(action) | - MSCC_MS_SAM_NM_FLOW_CP_TAGGED_DROP_ACTION(MSCC_MS_ACTION_DROP) | - MSCC_MS_SAM_NM_FLOW_CP_TAGGED_DEST_PORT(port) | - /* Bad tag */ - MSCC_MS_SAM_NM_FLOW_NCP_BADTAG_FLOW_TYPE(action) | - MSCC_MS_SAM_NM_FLOW_CP_BADTAG_DROP_ACTION(MSCC_MS_ACTION_DROP) | - MSCC_MS_SAM_NM_FLOW_CP_BADTAG_DEST_PORT(port) | - /* Kay tag */ - MSCC_MS_SAM_NM_FLOW_NCP_KAY_FLOW_TYPE(action) | - MSCC_MS_SAM_NM_FLOW_CP_KAY_DROP_ACTION(MSCC_MS_ACTION_DROP) | - MSCC_MS_SAM_NM_FLOW_CP_KAY_DEST_PORT(port)); -} - -static void vsc8584_macsec_integrity_checks(struct phy_device *phydev, - enum macsec_bank bank) -{ - u32 val; - - if (bank != MACSEC_INGR) - return; - - /* Set default rules to pass unmatched frames */ - val = vsc8584_macsec_phy_read(phydev, bank, - MSCC_MS_PARAMS2_IG_CC_CONTROL); - val |= MSCC_MS_PARAMS2_IG_CC_CONTROL_NON_MATCH_CTRL_ACT | - MSCC_MS_PARAMS2_IG_CC_CONTROL_NON_MATCH_ACT; - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_PARAMS2_IG_CC_CONTROL, - val); - - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_PARAMS2_IG_CP_TAG, - MSCC_MS_PARAMS2_IG_CP_TAG_PARSE_STAG | - MSCC_MS_PARAMS2_IG_CP_TAG_PARSE_QTAG | - MSCC_MS_PARAMS2_IG_CP_TAG_PARSE_QINQ); -} - -static void vsc8584_macsec_block_init(struct phy_device *phydev, - enum macsec_bank bank) -{ - u32 val; - int i; - - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_ENA_CFG, - MSCC_MS_ENA_CFG_SW_RST | - MSCC_MS_ENA_CFG_MACSEC_BYPASS_ENA); - - /* Set the MACsec block out of s/w reset and enable clocks */ - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_ENA_CFG, - MSCC_MS_ENA_CFG_CLK_ENA); - - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_STATUS_CONTEXT_CTRL, - bank == MACSEC_INGR ? 0xe5880214 : 0xe5880218); - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_MISC_CONTROL, - MSCC_MS_MISC_CONTROL_MC_LATENCY_FIX(bank == MACSEC_INGR ? 57 : 40) | - MSCC_MS_MISC_CONTROL_XFORM_REC_SIZE(bank == MACSEC_INGR ? 1 : 2)); - - /* Clear the counters */ - val = vsc8584_macsec_phy_read(phydev, bank, MSCC_MS_COUNT_CONTROL); - val |= MSCC_MS_COUNT_CONTROL_AUTO_CNTR_RESET; - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_COUNT_CONTROL, val); - - /* Enable octet increment mode */ - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_PP_CTRL, - MSCC_MS_PP_CTRL_MACSEC_OCTET_INCR_MODE); - - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_BLOCK_CTX_UPDATE, 0x3); - - val = vsc8584_macsec_phy_read(phydev, bank, MSCC_MS_COUNT_CONTROL); - val |= MSCC_MS_COUNT_CONTROL_RESET_ALL; - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_COUNT_CONTROL, val); - - /* Set the MTU */ - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_NON_VLAN_MTU_CHECK, - MSCC_MS_NON_VLAN_MTU_CHECK_NV_MTU_COMPARE(32761) | - MSCC_MS_NON_VLAN_MTU_CHECK_NV_MTU_COMP_DROP); - - for (i = 0; i < 8; i++) - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_VLAN_MTU_CHECK(i), - MSCC_MS_VLAN_MTU_CHECK_MTU_COMPARE(32761) | - MSCC_MS_VLAN_MTU_CHECK_MTU_COMP_DROP); - - if (bank == MACSEC_EGR) { - val = vsc8584_macsec_phy_read(phydev, bank, MSCC_MS_INTR_CTRL_STATUS); - val &= ~MSCC_MS_INTR_CTRL_STATUS_INTR_ENABLE_M; - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_INTR_CTRL_STATUS, val); - - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_FC_CFG, - MSCC_MS_FC_CFG_FCBUF_ENA | - MSCC_MS_FC_CFG_LOW_THRESH(0x1) | - MSCC_MS_FC_CFG_HIGH_THRESH(0x4) | - MSCC_MS_FC_CFG_LOW_BYTES_VAL(0x4) | - MSCC_MS_FC_CFG_HIGH_BYTES_VAL(0x6)); - } - - vsc8584_macsec_classification(phydev, bank); - vsc8584_macsec_flow_default_action(phydev, bank, false); - vsc8584_macsec_integrity_checks(phydev, bank); - - /* Enable the MACsec block */ - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_ENA_CFG, - MSCC_MS_ENA_CFG_CLK_ENA | - MSCC_MS_ENA_CFG_MACSEC_ENA | - MSCC_MS_ENA_CFG_MACSEC_SPEED_MODE(0x5)); -} - -static void vsc8584_macsec_mac_init(struct phy_device *phydev, - enum macsec_bank bank) -{ - u32 val; - int i; - - /* Clear host & line stats */ - for (i = 0; i < 36; i++) - vsc8584_macsec_phy_write(phydev, bank, 0x1c + i, 0); - - val = vsc8584_macsec_phy_read(phydev, bank, - MSCC_MAC_PAUSE_CFG_TX_FRAME_CTRL); - val &= ~MSCC_MAC_PAUSE_CFG_TX_FRAME_CTRL_PAUSE_MODE_M; - val |= MSCC_MAC_PAUSE_CFG_TX_FRAME_CTRL_PAUSE_MODE(2) | - MSCC_MAC_PAUSE_CFG_TX_FRAME_CTRL_PAUSE_VALUE(0xffff); - vsc8584_macsec_phy_write(phydev, bank, - MSCC_MAC_PAUSE_CFG_TX_FRAME_CTRL, val); - - val = vsc8584_macsec_phy_read(phydev, bank, - MSCC_MAC_PAUSE_CFG_TX_FRAME_CTRL_2); - val |= 0xffff; - vsc8584_macsec_phy_write(phydev, bank, - MSCC_MAC_PAUSE_CFG_TX_FRAME_CTRL_2, val); - - val = vsc8584_macsec_phy_read(phydev, bank, - MSCC_MAC_PAUSE_CFG_RX_FRAME_CTRL); - if (bank == HOST_MAC) - val |= MSCC_MAC_PAUSE_CFG_RX_FRAME_CTRL_PAUSE_TIMER_ENA | - MSCC_MAC_PAUSE_CFG_RX_FRAME_CTRL_PAUSE_FRAME_DROP_ENA; - else - val |= MSCC_MAC_PAUSE_CFG_RX_FRAME_CTRL_PAUSE_REACT_ENA | - MSCC_MAC_PAUSE_CFG_RX_FRAME_CTRL_PAUSE_FRAME_DROP_ENA | - MSCC_MAC_PAUSE_CFG_RX_FRAME_CTRL_PAUSE_MODE | - MSCC_MAC_PAUSE_CFG_RX_FRAME_CTRL_EARLY_PAUSE_DETECT_ENA; - vsc8584_macsec_phy_write(phydev, bank, - MSCC_MAC_PAUSE_CFG_RX_FRAME_CTRL, val); - - vsc8584_macsec_phy_write(phydev, bank, MSCC_MAC_CFG_PKTINF_CFG, - MSCC_MAC_CFG_PKTINF_CFG_STRIP_FCS_ENA | - MSCC_MAC_CFG_PKTINF_CFG_INSERT_FCS_ENA | - MSCC_MAC_CFG_PKTINF_CFG_LPI_RELAY_ENA | - MSCC_MAC_CFG_PKTINF_CFG_STRIP_PREAMBLE_ENA | - MSCC_MAC_CFG_PKTINF_CFG_INSERT_PREAMBLE_ENA | - (bank == HOST_MAC ? - MSCC_MAC_CFG_PKTINF_CFG_ENABLE_TX_PADDING : 0)); - - val = vsc8584_macsec_phy_read(phydev, bank, MSCC_MAC_CFG_MODE_CFG); - val &= ~MSCC_MAC_CFG_MODE_CFG_DISABLE_DIC; - vsc8584_macsec_phy_write(phydev, bank, MSCC_MAC_CFG_MODE_CFG, val); - - val = vsc8584_macsec_phy_read(phydev, bank, MSCC_MAC_CFG_MAXLEN_CFG); - val &= ~MSCC_MAC_CFG_MAXLEN_CFG_MAX_LEN_M; - val |= MSCC_MAC_CFG_MAXLEN_CFG_MAX_LEN(10240); - vsc8584_macsec_phy_write(phydev, bank, MSCC_MAC_CFG_MAXLEN_CFG, val); - - vsc8584_macsec_phy_write(phydev, bank, MSCC_MAC_CFG_ADV_CHK_CFG, - MSCC_MAC_CFG_ADV_CHK_CFG_SFD_CHK_ENA | - MSCC_MAC_CFG_ADV_CHK_CFG_PRM_CHK_ENA | - MSCC_MAC_CFG_ADV_CHK_CFG_OOR_ERR_ENA | - MSCC_MAC_CFG_ADV_CHK_CFG_INR_ERR_ENA); - - val = vsc8584_macsec_phy_read(phydev, bank, MSCC_MAC_CFG_LFS_CFG); - val &= ~MSCC_MAC_CFG_LFS_CFG_LFS_MODE_ENA; - vsc8584_macsec_phy_write(phydev, bank, MSCC_MAC_CFG_LFS_CFG, val); - - vsc8584_macsec_phy_write(phydev, bank, MSCC_MAC_CFG_ENA_CFG, - MSCC_MAC_CFG_ENA_CFG_RX_CLK_ENA | - MSCC_MAC_CFG_ENA_CFG_TX_CLK_ENA | - MSCC_MAC_CFG_ENA_CFG_RX_ENA | - MSCC_MAC_CFG_ENA_CFG_TX_ENA); -} - -/* Must be called with mdio_lock taken */ -static int vsc8584_macsec_init(struct phy_device *phydev) -{ - u32 val; - - vsc8584_macsec_block_init(phydev, MACSEC_INGR); - vsc8584_macsec_block_init(phydev, MACSEC_EGR); - vsc8584_macsec_mac_init(phydev, HOST_MAC); - vsc8584_macsec_mac_init(phydev, LINE_MAC); - - vsc8584_macsec_phy_write(phydev, FC_BUFFER, - MSCC_FCBUF_FC_READ_THRESH_CFG, - MSCC_FCBUF_FC_READ_THRESH_CFG_TX_THRESH(4) | - MSCC_FCBUF_FC_READ_THRESH_CFG_RX_THRESH(5)); - - val = vsc8584_macsec_phy_read(phydev, FC_BUFFER, MSCC_FCBUF_MODE_CFG); - val |= MSCC_FCBUF_MODE_CFG_PAUSE_GEN_ENA | - MSCC_FCBUF_MODE_CFG_RX_PPM_RATE_ADAPT_ENA | - MSCC_FCBUF_MODE_CFG_TX_PPM_RATE_ADAPT_ENA; - vsc8584_macsec_phy_write(phydev, FC_BUFFER, MSCC_FCBUF_MODE_CFG, val); - - vsc8584_macsec_phy_write(phydev, FC_BUFFER, MSCC_FCBUF_PPM_RATE_ADAPT_THRESH_CFG, - MSCC_FCBUF_PPM_RATE_ADAPT_THRESH_CFG_TX_THRESH(8) | - MSCC_FCBUF_PPM_RATE_ADAPT_THRESH_CFG_TX_OFFSET(9)); - - val = vsc8584_macsec_phy_read(phydev, FC_BUFFER, - MSCC_FCBUF_TX_DATA_QUEUE_CFG); - val &= ~(MSCC_FCBUF_TX_DATA_QUEUE_CFG_START_M | - MSCC_FCBUF_TX_DATA_QUEUE_CFG_END_M); - val |= MSCC_FCBUF_TX_DATA_QUEUE_CFG_START(0) | - MSCC_FCBUF_TX_DATA_QUEUE_CFG_END(5119); - vsc8584_macsec_phy_write(phydev, FC_BUFFER, - MSCC_FCBUF_TX_DATA_QUEUE_CFG, val); - - val = vsc8584_macsec_phy_read(phydev, FC_BUFFER, MSCC_FCBUF_ENA_CFG); - val |= MSCC_FCBUF_ENA_CFG_TX_ENA | MSCC_FCBUF_ENA_CFG_RX_ENA; - vsc8584_macsec_phy_write(phydev, FC_BUFFER, MSCC_FCBUF_ENA_CFG, val); - - val = vsc8584_macsec_phy_read(phydev, IP_1588, - MSCC_PROC_0_IP_1588_TOP_CFG_STAT_MODE_CTL); - val &= ~MSCC_PROC_0_IP_1588_TOP_CFG_STAT_MODE_CTL_PROTOCOL_MODE_M; - val |= MSCC_PROC_0_IP_1588_TOP_CFG_STAT_MODE_CTL_PROTOCOL_MODE(4); - vsc8584_macsec_phy_write(phydev, IP_1588, - MSCC_PROC_0_IP_1588_TOP_CFG_STAT_MODE_CTL, val); - - return 0; -} - -static void vsc8584_macsec_flow(struct phy_device *phydev, - struct macsec_flow *flow) -{ - struct vsc8531_private *priv = phydev->priv; - enum macsec_bank bank = flow->bank; - u32 val, match = 0, mask = 0, action = 0, idx = flow->index; - - if (flow->match.tagged) - match |= MSCC_MS_SAM_MISC_MATCH_TAGGED; - if (flow->match.untagged) - match |= MSCC_MS_SAM_MISC_MATCH_UNTAGGED; - - if (bank == MACSEC_INGR && flow->assoc_num >= 0) { - match |= MSCC_MS_SAM_MISC_MATCH_AN(flow->assoc_num); - mask |= MSCC_MS_SAM_MASK_AN_MASK(0x3); - } - - if (bank == MACSEC_INGR && flow->match.sci && flow->rx_sa->sc->sci) { - match |= MSCC_MS_SAM_MISC_MATCH_TCI(BIT(3)); - mask |= MSCC_MS_SAM_MASK_TCI_MASK(BIT(3)) | - MSCC_MS_SAM_MASK_SCI_MASK; - - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_MATCH_SCI_LO(idx), - lower_32_bits(flow->rx_sa->sc->sci)); - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_MATCH_SCI_HI(idx), - upper_32_bits(flow->rx_sa->sc->sci)); - } - - if (flow->match.etype) { - mask |= MSCC_MS_SAM_MASK_MAC_ETYPE_MASK; - - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_MAC_SA_MATCH_HI(idx), - MSCC_MS_SAM_MAC_SA_MATCH_HI_ETYPE(htons(flow->etype))); - } - - match |= MSCC_MS_SAM_MISC_MATCH_PRIORITY(flow->priority); - - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_MISC_MATCH(idx), match); - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_MASK(idx), mask); - - /* Action for matching packets */ - if (flow->action.drop) - action = MSCC_MS_FLOW_DROP; - else if (flow->action.bypass || flow->port == MSCC_MS_PORT_UNCONTROLLED) - action = MSCC_MS_FLOW_BYPASS; - else - action = (bank == MACSEC_INGR) ? - MSCC_MS_FLOW_INGRESS : MSCC_MS_FLOW_EGRESS; - - val = MSCC_MS_SAM_FLOW_CTRL_FLOW_TYPE(action) | - MSCC_MS_SAM_FLOW_CTRL_DROP_ACTION(MSCC_MS_ACTION_DROP) | - MSCC_MS_SAM_FLOW_CTRL_DEST_PORT(flow->port); - - if (action == MSCC_MS_FLOW_BYPASS) - goto write_ctrl; - - if (bank == MACSEC_INGR) { - if (priv->secy->replay_protect) - val |= MSCC_MS_SAM_FLOW_CTRL_REPLAY_PROTECT; - if (priv->secy->validate_frames == MACSEC_VALIDATE_STRICT) - val |= MSCC_MS_SAM_FLOW_CTRL_VALIDATE_FRAMES(MSCC_MS_VALIDATE_STRICT); - else if (priv->secy->validate_frames == MACSEC_VALIDATE_CHECK) - val |= MSCC_MS_SAM_FLOW_CTRL_VALIDATE_FRAMES(MSCC_MS_VALIDATE_CHECK); - } else if (bank == MACSEC_EGR) { - if (priv->secy->protect_frames) - val |= MSCC_MS_SAM_FLOW_CTRL_PROTECT_FRAME; - if (priv->secy->tx_sc.encrypt) - val |= MSCC_MS_SAM_FLOW_CTRL_CONF_PROTECT; - if (priv->secy->tx_sc.send_sci) - val |= MSCC_MS_SAM_FLOW_CTRL_INCLUDE_SCI; - } - -write_ctrl: - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_FLOW_CTRL(idx), val); -} - -static struct macsec_flow *vsc8584_macsec_find_flow(struct macsec_context *ctx, - enum macsec_bank bank) -{ - struct vsc8531_private *priv = ctx->phydev->priv; - struct macsec_flow *pos, *tmp; - - list_for_each_entry_safe(pos, tmp, &priv->macsec_flows, list) - if (pos->assoc_num == ctx->sa.assoc_num && pos->bank == bank) - return pos; - - return ERR_PTR(-ENOENT); -} - -static void vsc8584_macsec_flow_enable(struct phy_device *phydev, - struct macsec_flow *flow) -{ - enum macsec_bank bank = flow->bank; - u32 val, idx = flow->index; - - if ((flow->bank == MACSEC_INGR && flow->rx_sa && !flow->rx_sa->active) || - (flow->bank == MACSEC_EGR && flow->tx_sa && !flow->tx_sa->active)) - return; - - /* Enable */ - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_ENTRY_SET1, BIT(idx)); - - /* Set in-use */ - val = vsc8584_macsec_phy_read(phydev, bank, MSCC_MS_SAM_FLOW_CTRL(idx)); - val |= MSCC_MS_SAM_FLOW_CTRL_SA_IN_USE; - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_FLOW_CTRL(idx), val); -} - -static void vsc8584_macsec_flow_disable(struct phy_device *phydev, - struct macsec_flow *flow) -{ - enum macsec_bank bank = flow->bank; - u32 val, idx = flow->index; - - /* Disable */ - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_ENTRY_CLEAR1, BIT(idx)); - - /* Clear in-use */ - val = vsc8584_macsec_phy_read(phydev, bank, MSCC_MS_SAM_FLOW_CTRL(idx)); - val &= ~MSCC_MS_SAM_FLOW_CTRL_SA_IN_USE; - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_SAM_FLOW_CTRL(idx), val); -} - -static u32 vsc8584_macsec_flow_context_id(struct macsec_flow *flow) -{ - if (flow->bank == MACSEC_INGR) - return flow->index + MSCC_MS_MAX_FLOWS; - - return flow->index; -} - -/* Derive the AES key to get a key for the hash autentication */ -static int vsc8584_macsec_derive_key(const u8 key[MACSEC_KEYID_LEN], - u16 key_len, u8 hkey[16]) -{ - struct crypto_skcipher *tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0); - struct skcipher_request *req = NULL; - struct scatterlist src, dst; - DECLARE_CRYPTO_WAIT(wait); - u32 input[4] = {0}; - int ret; - - if (IS_ERR(tfm)) - return PTR_ERR(tfm); - - req = skcipher_request_alloc(tfm, GFP_KERNEL); - if (!req) { - ret = -ENOMEM; - goto out; - } - - skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | - CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, - &wait); - ret = crypto_skcipher_setkey(tfm, key, key_len); - if (ret < 0) - goto out; - - sg_init_one(&src, input, 16); - sg_init_one(&dst, hkey, 16); - skcipher_request_set_crypt(req, &src, &dst, 16, NULL); - - ret = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); - -out: - skcipher_request_free(req); - crypto_free_skcipher(tfm); - return ret; -} - -static int vsc8584_macsec_transformation(struct phy_device *phydev, - struct macsec_flow *flow) -{ - struct vsc8531_private *priv = phydev->priv; - enum macsec_bank bank = flow->bank; - int i, ret, index = flow->index; - u32 rec = 0, control = 0; - u8 hkey[16]; - sci_t sci; - - ret = vsc8584_macsec_derive_key(flow->key, priv->secy->key_len, hkey); - if (ret) - return ret; - - switch (priv->secy->key_len) { - case 16: - control |= CONTROL_CRYPTO_ALG(CTRYPTO_ALG_AES_CTR_128); - break; - case 32: - control |= CONTROL_CRYPTO_ALG(CTRYPTO_ALG_AES_CTR_256); - break; - default: - return -EINVAL; - } - - control |= (bank == MACSEC_EGR) ? - (CONTROL_TYPE_EGRESS | CONTROL_AN(priv->secy->tx_sc.encoding_sa)) : - (CONTROL_TYPE_INGRESS | CONTROL_SEQ_MASK); - - control |= CONTROL_UPDATE_SEQ | CONTROL_ENCRYPT_AUTH | CONTROL_KEY_IN_CTX | - CONTROL_IV0 | CONTROL_IV1 | CONTROL_IV_IN_SEQ | - CONTROL_DIGEST_TYPE(0x2) | CONTROL_SEQ_TYPE(0x1) | - CONTROL_AUTH_ALG(AUTH_ALG_AES_GHAS) | CONTROL_CONTEXT_ID; - - /* Set the control word */ - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_XFORM_REC(index, rec++), - control); - - /* Set the context ID. Must be unique. */ - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_XFORM_REC(index, rec++), - vsc8584_macsec_flow_context_id(flow)); - - /* Set the encryption/decryption key */ - for (i = 0; i < priv->secy->key_len / sizeof(u32); i++) - vsc8584_macsec_phy_write(phydev, bank, - MSCC_MS_XFORM_REC(index, rec++), - ((u32 *)flow->key)[i]); - - /* Set the authentication key */ - for (i = 0; i < 4; i++) - vsc8584_macsec_phy_write(phydev, bank, - MSCC_MS_XFORM_REC(index, rec++), - ((u32 *)hkey)[i]); - - /* Initial sequence number */ - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_XFORM_REC(index, rec++), - bank == MACSEC_INGR ? - flow->rx_sa->next_pn : flow->tx_sa->next_pn); - - if (bank == MACSEC_INGR) - /* Set the mask (replay window size) */ - vsc8584_macsec_phy_write(phydev, bank, - MSCC_MS_XFORM_REC(index, rec++), - priv->secy->replay_window); - - /* Set the input vectors */ - sci = bank == MACSEC_INGR ? flow->rx_sa->sc->sci : priv->secy->sci; - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_XFORM_REC(index, rec++), - lower_32_bits(sci)); - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_XFORM_REC(index, rec++), - upper_32_bits(sci)); - - while (rec < 20) - vsc8584_macsec_phy_write(phydev, bank, MSCC_MS_XFORM_REC(index, rec++), - 0); - - flow->has_transformation = true; - return 0; -} - -static struct macsec_flow *vsc8584_macsec_alloc_flow(struct vsc8531_private *priv, - enum macsec_bank bank) -{ - unsigned long *bitmap = bank == MACSEC_INGR ? - &priv->ingr_flows : &priv->egr_flows; - struct macsec_flow *flow; - int index; - - index = find_first_zero_bit(bitmap, MSCC_MS_MAX_FLOWS); - - if (index == MSCC_MS_MAX_FLOWS) - return ERR_PTR(-ENOMEM); - - flow = kzalloc(sizeof(*flow), GFP_KERNEL); - if (!flow) - return ERR_PTR(-ENOMEM); - - set_bit(index, bitmap); - flow->index = index; - flow->bank = bank; - flow->priority = 8; - flow->assoc_num = -1; - - list_add_tail(&flow->list, &priv->macsec_flows); - return flow; -} - -static void vsc8584_macsec_free_flow(struct vsc8531_private *priv, - struct macsec_flow *flow) -{ - unsigned long *bitmap = flow->bank == MACSEC_INGR ? - &priv->ingr_flows : &priv->egr_flows; - - list_del(&flow->list); - clear_bit(flow->index, bitmap); - kfree(flow); -} - -static int vsc8584_macsec_add_flow(struct phy_device *phydev, - struct macsec_flow *flow, bool update) -{ - int ret; - - flow->port = MSCC_MS_PORT_CONTROLLED; - vsc8584_macsec_flow(phydev, flow); - - if (update) - return 0; - - ret = vsc8584_macsec_transformation(phydev, flow); - if (ret) { - vsc8584_macsec_free_flow(phydev->priv, flow); - return ret; - } - - return 0; -} - -static int vsc8584_macsec_default_flows(struct phy_device *phydev) -{ - struct macsec_flow *flow; - - /* Add a rule to let the MKA traffic go through, ingress */ - flow = vsc8584_macsec_alloc_flow(phydev->priv, MACSEC_INGR); - if (IS_ERR(flow)) - return PTR_ERR(flow); - - flow->priority = 15; - flow->port = MSCC_MS_PORT_UNCONTROLLED; - flow->match.tagged = 1; - flow->match.untagged = 1; - flow->match.etype = 1; - flow->etype = ETH_P_PAE; - flow->action.bypass = 1; - - vsc8584_macsec_flow(phydev, flow); - vsc8584_macsec_flow_enable(phydev, flow); - - /* Add a rule to let the MKA traffic go through, egress */ - flow = vsc8584_macsec_alloc_flow(phydev->priv, MACSEC_EGR); - if (IS_ERR(flow)) - return PTR_ERR(flow); - - flow->priority = 15; - flow->port = MSCC_MS_PORT_COMMON; - flow->match.untagged = 1; - flow->match.etype = 1; - flow->etype = ETH_P_PAE; - flow->action.bypass = 1; - - vsc8584_macsec_flow(phydev, flow); - vsc8584_macsec_flow_enable(phydev, flow); - - return 0; -} - -static void vsc8584_macsec_del_flow(struct phy_device *phydev, - struct macsec_flow *flow) -{ - vsc8584_macsec_flow_disable(phydev, flow); - vsc8584_macsec_free_flow(phydev->priv, flow); -} - -static int __vsc8584_macsec_add_rxsa(struct macsec_context *ctx, - struct macsec_flow *flow, bool update) -{ - struct phy_device *phydev = ctx->phydev; - struct vsc8531_private *priv = phydev->priv; - - if (!flow) { - flow = vsc8584_macsec_alloc_flow(priv, MACSEC_INGR); - if (IS_ERR(flow)) - return PTR_ERR(flow); - - memcpy(flow->key, ctx->sa.key, priv->secy->key_len); - } - - flow->assoc_num = ctx->sa.assoc_num; - flow->rx_sa = ctx->sa.rx_sa; - - /* Always match tagged packets on ingress */ - flow->match.tagged = 1; - flow->match.sci = 1; - - if (priv->secy->validate_frames != MACSEC_VALIDATE_DISABLED) - flow->match.untagged = 1; - - return vsc8584_macsec_add_flow(phydev, flow, update); -} - -static int __vsc8584_macsec_add_txsa(struct macsec_context *ctx, - struct macsec_flow *flow, bool update) -{ - struct phy_device *phydev = ctx->phydev; - struct vsc8531_private *priv = phydev->priv; - - if (!flow) { - flow = vsc8584_macsec_alloc_flow(priv, MACSEC_EGR); - if (IS_ERR(flow)) - return PTR_ERR(flow); - - memcpy(flow->key, ctx->sa.key, priv->secy->key_len); - } - - flow->assoc_num = ctx->sa.assoc_num; - flow->tx_sa = ctx->sa.tx_sa; - - /* Always match untagged packets on egress */ - flow->match.untagged = 1; - - return vsc8584_macsec_add_flow(phydev, flow, update); -} - -static int vsc8584_macsec_dev_open(struct macsec_context *ctx) -{ - struct vsc8531_private *priv = ctx->phydev->priv; - struct macsec_flow *flow, *tmp; - - /* No operation to perform before the commit step */ - if (ctx->prepare) - return 0; - - list_for_each_entry_safe(flow, tmp, &priv->macsec_flows, list) - vsc8584_macsec_flow_enable(ctx->phydev, flow); - - return 0; -} - -static int vsc8584_macsec_dev_stop(struct macsec_context *ctx) -{ - struct vsc8531_private *priv = ctx->phydev->priv; - struct macsec_flow *flow, *tmp; - - /* No operation to perform before the commit step */ - if (ctx->prepare) - return 0; - - list_for_each_entry_safe(flow, tmp, &priv->macsec_flows, list) - vsc8584_macsec_flow_disable(ctx->phydev, flow); - - return 0; -} - -static int vsc8584_macsec_add_secy(struct macsec_context *ctx) -{ - struct vsc8531_private *priv = ctx->phydev->priv; - struct macsec_secy *secy = ctx->secy; - - if (ctx->prepare) { - if (priv->secy) - return -EEXIST; - - return 0; - } - - priv->secy = secy; - - vsc8584_macsec_flow_default_action(ctx->phydev, MACSEC_EGR, - secy->validate_frames != MACSEC_VALIDATE_DISABLED); - vsc8584_macsec_flow_default_action(ctx->phydev, MACSEC_INGR, - secy->validate_frames != MACSEC_VALIDATE_DISABLED); - - return vsc8584_macsec_default_flows(ctx->phydev); -} - -static int vsc8584_macsec_del_secy(struct macsec_context *ctx) -{ - struct vsc8531_private *priv = ctx->phydev->priv; - struct macsec_flow *flow, *tmp; - - /* No operation to perform before the commit step */ - if (ctx->prepare) - return 0; - - list_for_each_entry_safe(flow, tmp, &priv->macsec_flows, list) - vsc8584_macsec_del_flow(ctx->phydev, flow); - - vsc8584_macsec_flow_default_action(ctx->phydev, MACSEC_EGR, false); - vsc8584_macsec_flow_default_action(ctx->phydev, MACSEC_INGR, false); - - priv->secy = NULL; - return 0; -} - -static int vsc8584_macsec_upd_secy(struct macsec_context *ctx) -{ - /* No operation to perform before the commit step */ - if (ctx->prepare) - return 0; - - vsc8584_macsec_del_secy(ctx); - return vsc8584_macsec_add_secy(ctx); -} - -static int vsc8584_macsec_add_rxsc(struct macsec_context *ctx) -{ - /* Nothing to do */ - return 0; -} - -static int vsc8584_macsec_upd_rxsc(struct macsec_context *ctx) -{ - return -EOPNOTSUPP; -} - -static int vsc8584_macsec_del_rxsc(struct macsec_context *ctx) -{ - struct vsc8531_private *priv = ctx->phydev->priv; - struct macsec_flow *flow, *tmp; - - /* No operation to perform before the commit step */ - if (ctx->prepare) - return 0; - - list_for_each_entry_safe(flow, tmp, &priv->macsec_flows, list) { - if (flow->bank == MACSEC_INGR && flow->rx_sa && - flow->rx_sa->sc->sci == ctx->rx_sc->sci) - vsc8584_macsec_del_flow(ctx->phydev, flow); - } - - return 0; -} - -static int vsc8584_macsec_add_rxsa(struct macsec_context *ctx) -{ - struct macsec_flow *flow = NULL; - - if (ctx->prepare) - return __vsc8584_macsec_add_rxsa(ctx, flow, false); - - flow = vsc8584_macsec_find_flow(ctx, MACSEC_INGR); - if (IS_ERR(flow)) - return PTR_ERR(flow); - - vsc8584_macsec_flow_enable(ctx->phydev, flow); - return 0; -} - -static int vsc8584_macsec_upd_rxsa(struct macsec_context *ctx) -{ - struct macsec_flow *flow; - - flow = vsc8584_macsec_find_flow(ctx, MACSEC_INGR); - if (IS_ERR(flow)) - return PTR_ERR(flow); - - if (ctx->prepare) { - /* Make sure the flow is disabled before updating it */ - vsc8584_macsec_flow_disable(ctx->phydev, flow); - - return __vsc8584_macsec_add_rxsa(ctx, flow, true); - } - - vsc8584_macsec_flow_enable(ctx->phydev, flow); - return 0; -} - -static int vsc8584_macsec_del_rxsa(struct macsec_context *ctx) -{ - struct macsec_flow *flow; - - flow = vsc8584_macsec_find_flow(ctx, MACSEC_INGR); - - if (IS_ERR(flow)) - return PTR_ERR(flow); - if (ctx->prepare) - return 0; - - vsc8584_macsec_del_flow(ctx->phydev, flow); - return 0; -} - -static int vsc8584_macsec_add_txsa(struct macsec_context *ctx) -{ - struct macsec_flow *flow = NULL; - - if (ctx->prepare) - return __vsc8584_macsec_add_txsa(ctx, flow, false); - - flow = vsc8584_macsec_find_flow(ctx, MACSEC_EGR); - if (IS_ERR(flow)) - return PTR_ERR(flow); - - vsc8584_macsec_flow_enable(ctx->phydev, flow); - return 0; -} - -static int vsc8584_macsec_upd_txsa(struct macsec_context *ctx) -{ - struct macsec_flow *flow; - - flow = vsc8584_macsec_find_flow(ctx, MACSEC_EGR); - if (IS_ERR(flow)) - return PTR_ERR(flow); - - if (ctx->prepare) { - /* Make sure the flow is disabled before updating it */ - vsc8584_macsec_flow_disable(ctx->phydev, flow); - - return __vsc8584_macsec_add_txsa(ctx, flow, true); - } - - vsc8584_macsec_flow_enable(ctx->phydev, flow); - return 0; -} - -static int vsc8584_macsec_del_txsa(struct macsec_context *ctx) -{ - struct macsec_flow *flow; - - flow = vsc8584_macsec_find_flow(ctx, MACSEC_EGR); - - if (IS_ERR(flow)) - return PTR_ERR(flow); - if (ctx->prepare) - return 0; - - vsc8584_macsec_del_flow(ctx->phydev, flow); - return 0; -} - -static struct macsec_ops vsc8584_macsec_ops = { - .mdo_dev_open = vsc8584_macsec_dev_open, - .mdo_dev_stop = vsc8584_macsec_dev_stop, - .mdo_add_secy = vsc8584_macsec_add_secy, - .mdo_upd_secy = vsc8584_macsec_upd_secy, - .mdo_del_secy = vsc8584_macsec_del_secy, - .mdo_add_rxsc = vsc8584_macsec_add_rxsc, - .mdo_upd_rxsc = vsc8584_macsec_upd_rxsc, - .mdo_del_rxsc = vsc8584_macsec_del_rxsc, - .mdo_add_rxsa = vsc8584_macsec_add_rxsa, - .mdo_upd_rxsa = vsc8584_macsec_upd_rxsa, - .mdo_del_rxsa = vsc8584_macsec_del_rxsa, - .mdo_add_txsa = vsc8584_macsec_add_txsa, - .mdo_upd_txsa = vsc8584_macsec_upd_txsa, - .mdo_del_txsa = vsc8584_macsec_del_txsa, -}; -#endif /* CONFIG_MACSEC */ - /* Check if one PHY has already done the init of the parts common to all PHYs * in the Quad PHY package. */ @@ -2685,6 +1301,32 @@ static bool vsc8584_is_pkg_init(struct phy_device *phydev, bool reversed) return false; } +static void vsc8584_rgmii_set_skews(struct phy_device *phydev) +{ + u32 skew_rx, skew_tx; + + /* We first set the Rx and Tx skews to their default value in h/w + * (0.2 ns). + */ + skew_rx = VSC8584_RGMII_SKEW_0_2; + skew_tx = VSC8584_RGMII_SKEW_0_2; + + /* We then set the skews based on the interface mode. */ + if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || + phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) + skew_rx = VSC8584_RGMII_SKEW_2_0; + if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || + phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) + skew_tx = VSC8584_RGMII_SKEW_2_0; + + /* Finally we apply the skews configuration. */ + phy_modify_paged(phydev, MSCC_PHY_PAGE_EXTENDED_2, + MSCC_PHY_RGMII_SETTINGS, + (0x7 << RGMII_SKEW_RX_POS) | (0x7 << RGMII_SKEW_TX_POS), + (skew_rx << RGMII_SKEW_RX_POS) | + (skew_tx << RGMII_SKEW_TX_POS)); +} + static int vsc8584_config_init(struct phy_device *phydev) { struct vsc8531_private *vsc8531 = phydev->priv; @@ -2757,27 +1399,35 @@ static int vsc8584_config_init(struct phy_device *phydev) val = phy_base_read(phydev, MSCC_PHY_MAC_CFG_FASTLINK); val &= ~MAC_CFG_MASK; - if (phydev->interface == PHY_INTERFACE_MODE_QSGMII) + if (phydev->interface == PHY_INTERFACE_MODE_QSGMII) { val |= MAC_CFG_QSGMII; - else + } else if (phydev->interface == PHY_INTERFACE_MODE_SGMII) { val |= MAC_CFG_SGMII; + } else if (phy_interface_is_rgmii(phydev)) { + val |= MAC_CFG_RGMII; + } else { + ret = -EINVAL; + goto err; + } ret = phy_base_write(phydev, MSCC_PHY_MAC_CFG_FASTLINK, val); if (ret) goto err; - val = PROC_CMD_MCB_ACCESS_MAC_CONF | PROC_CMD_RST_CONF_PORT | - PROC_CMD_READ_MOD_WRITE_PORT; - if (phydev->interface == PHY_INTERFACE_MODE_QSGMII) - val |= PROC_CMD_QSGMII_MAC; - else - val |= PROC_CMD_SGMII_MAC; + if (!phy_interface_is_rgmii(phydev)) { + val = PROC_CMD_MCB_ACCESS_MAC_CONF | PROC_CMD_RST_CONF_PORT | + PROC_CMD_READ_MOD_WRITE_PORT; + if (phydev->interface == PHY_INTERFACE_MODE_QSGMII) + val |= PROC_CMD_QSGMII_MAC; + else + val |= PROC_CMD_SGMII_MAC; - ret = vsc8584_cmd(phydev, val); - if (ret) - goto err; + ret = vsc8584_cmd(phydev, val); + if (ret) + goto err; - usleep_range(10000, 20000); + usleep_range(10000, 20000); + } /* Disable SerDes for 100Base-FX */ ret = vsc8584_cmd(phydev, PROC_CMD_FIBER_MEDIA_CONF | @@ -2797,23 +1447,9 @@ static int vsc8584_config_init(struct phy_device *phydev) mutex_unlock(&phydev->mdio.bus->mdio_lock); -#if IS_ENABLED(CONFIG_MACSEC) - /* MACsec */ - switch (phydev->phy_id & phydev->drv->phy_id_mask) { - case PHY_ID_VSC856X: - case PHY_ID_VSC8575: - case PHY_ID_VSC8582: - case PHY_ID_VSC8584: - INIT_LIST_HEAD(&vsc8531->macsec_flows); - vsc8531->secy = NULL; - - phydev->macsec_ops = &vsc8584_macsec_ops; - - ret = vsc8584_macsec_init(phydev); - if (ret) - goto err; - } -#endif + ret = vsc8584_macsec_init(phydev); + if (ret) + return ret; phy_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_STANDARD); @@ -2822,6 +1458,11 @@ static int vsc8584_config_init(struct phy_device *phydev) val |= (MEDIA_OP_MODE_COPPER << MEDIA_OP_MODE_POS) | (VSC8584_MAC_IF_SELECTION_SGMII << VSC8584_MAC_IF_SELECTION_POS); ret = phy_write(phydev, MSCC_PHY_EXT_PHY_CNTL_1, val); + if (ret) + return ret; + + if (phy_interface_is_rgmii(phydev)) + vsc8584_rgmii_set_skews(phydev); ret = genphy_soft_reset(phydev); if (ret) @@ -2840,41 +1481,21 @@ err: return ret; } -static int vsc8584_handle_interrupt(struct phy_device *phydev) +static irqreturn_t vsc8584_handle_interrupt(struct phy_device *phydev) { -#if IS_ENABLED(CONFIG_MACSEC) - struct vsc8531_private *priv = phydev->priv; - struct macsec_flow *flow, *tmp; - u32 cause, rec; - - /* Check MACsec PN rollover */ - cause = vsc8584_macsec_phy_read(phydev, MACSEC_EGR, - MSCC_MS_INTR_CTRL_STATUS); - cause &= MSCC_MS_INTR_CTRL_STATUS_INTR_CLR_STATUS_M; - if (!(cause & MACSEC_INTR_CTRL_STATUS_ROLLOVER)) - goto skip_rollover; + int irq_status; - rec = 6 + priv->secy->key_len / sizeof(u32); - list_for_each_entry_safe(flow, tmp, &priv->macsec_flows, list) { - u32 val; + irq_status = phy_read(phydev, MII_VSC85XX_INT_STATUS); + if (irq_status < 0 || !(irq_status & MII_VSC85XX_INT_MASK_MASK)) + return IRQ_NONE; - if (flow->bank != MACSEC_EGR || !flow->has_transformation) - continue; - - val = vsc8584_macsec_phy_read(phydev, MACSEC_EGR, - MSCC_MS_XFORM_REC(flow->index, rec)); - if (val == 0xffffffff) { - vsc8584_macsec_flow_disable(phydev, flow); - macsec_pn_wrapped(priv->secy, flow->tx_sa); - break; - } - } + if (irq_status & MII_VSC85XX_INT_MASK_EXT) + vsc8584_handle_macsec_interrupt(phydev); -skip_rollover: -#endif + if (irq_status & MII_VSC85XX_INT_MASK_LINK_CHG) + phy_mac_interrupt(phydev); - phy_mac_interrupt(phydev); - return 0; + return IRQ_HANDLED; } static int vsc85xx_config_init(struct phy_device *phydev) @@ -3320,20 +1941,8 @@ static int vsc85xx_config_intr(struct phy_device *phydev) int rc; if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { -#if IS_ENABLED(CONFIG_MACSEC) - phy_write(phydev, MSCC_EXT_PAGE_ACCESS, - MSCC_PHY_PAGE_EXTENDED_2); - phy_write(phydev, MSCC_PHY_EXTENDED_INT, - MSCC_PHY_EXTENDED_INT_MS_EGR); - phy_write(phydev, MSCC_EXT_PAGE_ACCESS, - MSCC_PHY_PAGE_STANDARD); - - vsc8584_macsec_phy_write(phydev, MACSEC_EGR, - MSCC_MS_AIC_CTRL, 0xf); - vsc8584_macsec_phy_write(phydev, MACSEC_EGR, - MSCC_MS_INTR_CTRL_STATUS, - MSCC_MS_INTR_CTRL_STATUS_INTR_ENABLE(MACSEC_INTR_CTRL_STATUS_ROLLOVER)); -#endif + vsc8584_config_macsec_intr(phydev); + rc = phy_write(phydev, MII_VSC85XX_INT_MASK, MII_VSC85XX_INT_MASK_MASK); } else { @@ -3481,6 +2090,30 @@ static int vsc85xx_probe(struct phy_device *phydev) /* Microsemi VSC85xx PHYs */ static struct phy_driver vsc85xx_driver[] = { { + .phy_id = PHY_ID_VSC8502, + .name = "Microsemi GE VSC8502 SyncE", + .phy_id_mask = 0xfffffff0, + /* PHY_BASIC_FEATURES */ + .soft_reset = &genphy_soft_reset, + .config_init = &vsc85xx_config_init, + .config_aneg = &vsc85xx_config_aneg, + .read_status = &vsc85xx_read_status, + .ack_interrupt = &vsc85xx_ack_interrupt, + .config_intr = &vsc85xx_config_intr, + .suspend = &genphy_suspend, + .resume = &genphy_resume, + .probe = &vsc85xx_probe, + .set_wol = &vsc85xx_wol_set, + .get_wol = &vsc85xx_wol_get, + .get_tunable = &vsc85xx_get_tunable, + .set_tunable = &vsc85xx_set_tunable, + .read_page = &vsc85xx_phy_read_page, + .write_page = &vsc85xx_phy_write_page, + .get_sset_count = &vsc85xx_get_sset_count, + .get_strings = &vsc85xx_get_strings, + .get_stats = &vsc85xx_get_stats, +}, +{ .phy_id = PHY_ID_VSC8504, .name = "Microsemi GE VSC8504 SyncE", .phy_id_mask = 0xfffffff0, diff --git a/drivers/net/phy/nxp-tja11xx.c b/drivers/net/phy/nxp-tja11xx.c index b705d0bd798b..47caae770ffc 100644 --- a/drivers/net/phy/nxp-tja11xx.c +++ b/drivers/net/phy/nxp-tja11xx.c @@ -72,20 +72,10 @@ static struct tja11xx_phy_stats tja11xx_hw_stats[] = { static int tja11xx_check(struct phy_device *phydev, u8 reg, u16 mask, u16 set) { - int i, ret; - - for (i = 0; i < 200; i++) { - ret = phy_read(phydev, reg); - if (ret < 0) - return ret; - - if ((ret & mask) == set) - return 0; - - usleep_range(100, 150); - } + int val; - return -ETIMEDOUT; + return phy_read_poll_timeout(phydev, reg, val, (val & mask) == set, + 150, 30000, false); } static int phy_modify_check(struct phy_device *phydev, u8 reg, diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index e083e7a76ada..66b8c61ca74c 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -329,6 +329,44 @@ void phy_resolve_aneg_linkmode(struct phy_device *phydev) } EXPORT_SYMBOL_GPL(phy_resolve_aneg_linkmode); +/** + * phy_check_downshift - check whether downshift occurred + * @phydev: The phy_device struct + * + * Check whether a downshift to a lower speed occurred. If this should be the + * case warn the user. + * Prerequisite for detecting downshift is that PHY driver implements the + * read_status callback and sets phydev->speed to the actual link speed. + */ +void phy_check_downshift(struct phy_device *phydev) +{ + __ETHTOOL_DECLARE_LINK_MODE_MASK(common); + int i, speed = SPEED_UNKNOWN; + + phydev->downshifted_rate = 0; + + if (phydev->autoneg == AUTONEG_DISABLE || + phydev->speed == SPEED_UNKNOWN) + return; + + linkmode_and(common, phydev->lp_advertising, phydev->advertising); + + for (i = 0; i < ARRAY_SIZE(settings); i++) + if (test_bit(settings[i].bit, common)) { + speed = settings[i].speed; + break; + } + + if (speed == SPEED_UNKNOWN || phydev->speed >= speed) + return; + + phydev_warn(phydev, "Downshift occurred from negotiated speed %s to actual speed %s, check cabling!\n", + phy_speed_to_str(speed), phy_speed_to_str(phydev->speed)); + + phydev->downshifted_rate = 1; +} +EXPORT_SYMBOL_GPL(phy_check_downshift); + static int phy_resolve_min_speed(struct phy_device *phydev, bool fdx_only) { __ETHTOOL_DECLARE_LINK_MODE_MASK(common); @@ -489,37 +527,6 @@ int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val) EXPORT_SYMBOL(phy_write_mmd); /** - * __phy_modify_changed() - Convenience function for modifying a PHY register - * @phydev: a pointer to a &struct phy_device - * @regnum: register number - * @mask: bit mask of bits to clear - * @set: bit mask of bits to set - * - * Unlocked helper function which allows a PHY register to be modified as - * new register value = (old register value & ~mask) | set - * - * Returns negative errno, 0 if there was no change, and 1 in case of change - */ -int __phy_modify_changed(struct phy_device *phydev, u32 regnum, u16 mask, - u16 set) -{ - int new, ret; - - ret = __phy_read(phydev, regnum); - if (ret < 0) - return ret; - - new = (ret & ~mask) | set; - if (new == ret) - return 0; - - ret = __phy_write(phydev, regnum, new); - - return ret < 0 ? ret : 1; -} -EXPORT_SYMBOL_GPL(__phy_modify_changed); - -/** * phy_modify_changed - Function for modifying a PHY register * @phydev: the phy_device struct * @regnum: register number to modify diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index d76e038cf2cb..72c69a9c8a98 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -96,9 +96,10 @@ void phy_print_status(struct phy_device *phydev) { if (phydev->link) { netdev_info(phydev->attached_dev, - "Link is Up - %s/%s - flow control %s\n", + "Link is Up - %s/%s %s- flow control %s\n", phy_speed_to_str(phydev->speed), phy_duplex_to_str(phydev->duplex), + phydev->downshifted_rate ? "(downshifted) " : "", phy_pause_str(phydev)); } else { netdev_info(phydev->attached_dev, "Link is Down\n"); @@ -507,6 +508,7 @@ static int phy_check_link_status(struct phy_device *phydev) return err; if (phydev->link && phydev->state != PHY_RUNNING) { + phy_check_downshift(phydev); phydev->state = PHY_RUNNING; phy_link_up(phydev); } else if (!phydev->link && phydev->state != PHY_NOLINK) { @@ -715,25 +717,24 @@ static int phy_disable_interrupts(struct phy_device *phydev) static irqreturn_t phy_interrupt(int irq, void *phy_dat) { struct phy_device *phydev = phy_dat; + struct phy_driver *drv = phydev->drv; - if (phydev->drv->did_interrupt && !phydev->drv->did_interrupt(phydev)) + if (drv->handle_interrupt) + return drv->handle_interrupt(phydev); + + if (drv->did_interrupt && !drv->did_interrupt(phydev)) return IRQ_NONE; - if (phydev->drv->handle_interrupt) { - if (phydev->drv->handle_interrupt(phydev)) - goto phy_err; - } else { - /* reschedule state queue work to run as soon as possible */ - phy_trigger_machine(phydev); + /* reschedule state queue work to run as soon as possible */ + phy_trigger_machine(phydev); + + /* did_interrupt() may have cleared the interrupt already */ + if (!drv->did_interrupt && phy_clear_interrupt(phydev)) { + phy_error(phydev); + return IRQ_NONE; } - if (phy_clear_interrupt(phydev)) - goto phy_err; return IRQ_HANDLED; - -phy_err: - phy_error(phydev); - return IRQ_NONE; } /** diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 7c00b029ebfb..3b8f6b0b47b5 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -286,6 +286,8 @@ static int mdio_bus_phy_suspend(struct device *dev) if (!mdio_bus_phy_may_suspend(phydev)) return 0; + phydev->suspended_by_mdio_bus = 1; + return phy_suspend(phydev); } @@ -294,9 +296,11 @@ static int mdio_bus_phy_resume(struct device *dev) struct phy_device *phydev = to_phy_device(dev); int ret; - if (!mdio_bus_phy_may_suspend(phydev)) + if (!phydev->suspended_by_mdio_bus) goto no_resume; + phydev->suspended_by_mdio_bus = 0; + ret = phy_resume(phydev); if (ret < 0) return ret; @@ -1055,18 +1059,12 @@ EXPORT_SYMBOL(phy_disconnect); static int phy_poll_reset(struct phy_device *phydev) { /* Poll until the reset bit clears (50ms per retry == 0.6 sec) */ - unsigned int retries = 12; - int ret; - - do { - msleep(50); - ret = phy_read(phydev, MII_BMCR); - if (ret < 0) - return ret; - } while (ret & BMCR_RESET && --retries); - if (ret & BMCR_RESET) - return -ETIMEDOUT; + int ret, val; + ret = phy_read_poll_timeout(phydev, MII_BMCR, val, !(val & BMCR_RESET), + 50000, 600000, true); + if (ret) + return ret; /* Some chips (smsc911x) may still need up to another 1ms after the * BMCR_RESET bit is cleared before they are usable. */ diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 19db68d74cb4..fed0c5907c6a 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -326,6 +326,33 @@ static int phylink_parse_mode(struct phylink *pl, struct fwnode_handle *fwnode) phylink_set(pl->supported, 10000baseER_Full); break; + case PHY_INTERFACE_MODE_XLGMII: + phylink_set(pl->supported, 25000baseCR_Full); + phylink_set(pl->supported, 25000baseKR_Full); + phylink_set(pl->supported, 25000baseSR_Full); + phylink_set(pl->supported, 40000baseKR4_Full); + phylink_set(pl->supported, 40000baseCR4_Full); + phylink_set(pl->supported, 40000baseSR4_Full); + phylink_set(pl->supported, 40000baseLR4_Full); + phylink_set(pl->supported, 50000baseCR2_Full); + phylink_set(pl->supported, 50000baseKR2_Full); + phylink_set(pl->supported, 50000baseSR2_Full); + phylink_set(pl->supported, 50000baseKR_Full); + phylink_set(pl->supported, 50000baseSR_Full); + phylink_set(pl->supported, 50000baseCR_Full); + phylink_set(pl->supported, 50000baseLR_ER_FR_Full); + phylink_set(pl->supported, 50000baseDR_Full); + phylink_set(pl->supported, 100000baseKR4_Full); + phylink_set(pl->supported, 100000baseSR4_Full); + phylink_set(pl->supported, 100000baseCR4_Full); + phylink_set(pl->supported, 100000baseLR4_ER4_Full); + phylink_set(pl->supported, 100000baseKR2_Full); + phylink_set(pl->supported, 100000baseSR2_Full); + phylink_set(pl->supported, 100000baseCR2_Full); + phylink_set(pl->supported, 100000baseLR2_ER2_FR2_Full); + phylink_set(pl->supported, 100000baseDR2_Full); + break; + default: phylink_err(pl, "incorrect link mode %s for in-band status\n", @@ -802,8 +829,14 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy, config.interface = interface; ret = phylink_validate(pl, supported, &config); - if (ret) + if (ret) { + phylink_warn(pl, "validation of %s with support %*pb and advertisement %*pb failed: %d\n", + phy_modes(config.interface), + __ETHTOOL_LINK_MODE_MASK_NBITS, phy->supported, + __ETHTOOL_LINK_MODE_MASK_NBITS, config.advertising, + ret); return ret; + } phy->phylink = pl; phy->phy_link_change = phylink_phy_change; @@ -2035,4 +2068,240 @@ void phylink_helper_basex_speed(struct phylink_link_state *state) } EXPORT_SYMBOL_GPL(phylink_helper_basex_speed); +static void phylink_decode_c37_word(struct phylink_link_state *state, + uint16_t config_reg, int speed) +{ + bool tx_pause, rx_pause; + int fd_bit; + + if (speed == SPEED_2500) + fd_bit = ETHTOOL_LINK_MODE_2500baseX_Full_BIT; + else + fd_bit = ETHTOOL_LINK_MODE_1000baseX_Full_BIT; + + mii_lpa_mod_linkmode_x(state->lp_advertising, config_reg, fd_bit); + + if (linkmode_test_bit(fd_bit, state->advertising) && + linkmode_test_bit(fd_bit, state->lp_advertising)) { + state->speed = speed; + state->duplex = DUPLEX_FULL; + } else { + /* negotiation failure */ + state->link = false; + } + + linkmode_resolve_pause(state->advertising, state->lp_advertising, + &tx_pause, &rx_pause); + + if (tx_pause) + state->pause |= MLO_PAUSE_TX; + if (rx_pause) + state->pause |= MLO_PAUSE_RX; +} + +static void phylink_decode_sgmii_word(struct phylink_link_state *state, + uint16_t config_reg) +{ + if (!(config_reg & LPA_SGMII_LINK)) { + state->link = false; + return; + } + + switch (config_reg & LPA_SGMII_SPD_MASK) { + case LPA_SGMII_10: + state->speed = SPEED_10; + break; + case LPA_SGMII_100: + state->speed = SPEED_100; + break; + case LPA_SGMII_1000: + state->speed = SPEED_1000; + break; + default: + state->link = false; + return; + } + if (config_reg & LPA_SGMII_FULL_DUPLEX) + state->duplex = DUPLEX_FULL; + else + state->duplex = DUPLEX_HALF; +} + +/** + * phylink_mii_c22_pcs_get_state() - read the MAC PCS state + * @pcs: a pointer to a &struct mdio_device. + * @state: a pointer to a &struct phylink_link_state. + * + * Helper for MAC PCS supporting the 802.3 clause 22 register set for + * clause 37 negotiation and/or SGMII control. + * + * Read the MAC PCS state from the MII device configured in @config and + * parse the Clause 37 or Cisco SGMII link partner negotiation word into + * the phylink @state structure. This is suitable to be directly plugged + * into the mac_pcs_get_state() member of the struct phylink_mac_ops + * structure. + */ +void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs, + struct phylink_link_state *state) +{ + struct mii_bus *bus = pcs->bus; + int addr = pcs->addr; + int bmsr, lpa; + + bmsr = mdiobus_read(bus, addr, MII_BMSR); + lpa = mdiobus_read(bus, addr, MII_LPA); + if (bmsr < 0 || lpa < 0) { + state->link = false; + return; + } + + state->link = !!(bmsr & BMSR_LSTATUS); + state->an_complete = !!(bmsr & BMSR_ANEGCOMPLETE); + if (!state->link) + return; + + switch (state->interface) { + case PHY_INTERFACE_MODE_1000BASEX: + phylink_decode_c37_word(state, lpa, SPEED_1000); + break; + + case PHY_INTERFACE_MODE_2500BASEX: + phylink_decode_c37_word(state, lpa, SPEED_2500); + break; + + case PHY_INTERFACE_MODE_SGMII: + phylink_decode_sgmii_word(state, lpa); + break; + + default: + state->link = false; + break; + } +} +EXPORT_SYMBOL_GPL(phylink_mii_c22_pcs_get_state); + +/** + * phylink_mii_c22_pcs_set_advertisement() - configure the clause 37 PCS + * advertisement + * @pcs: a pointer to a &struct mdio_device. + * @state: a pointer to the state being configured. + * + * Helper for MAC PCS supporting the 802.3 clause 22 register set for + * clause 37 negotiation and/or SGMII control. + * + * Configure the clause 37 PCS advertisement as specified by @state. This + * does not trigger a renegotiation; phylink will do that via the + * mac_an_restart() method of the struct phylink_mac_ops structure. + * + * Returns negative error code on failure to configure the advertisement, + * zero if no change has been made, or one if the advertisement has changed. + */ +int phylink_mii_c22_pcs_set_advertisement(struct mdio_device *pcs, + const struct phylink_link_state *state) +{ + struct mii_bus *bus = pcs->bus; + int addr = pcs->addr; + int val, ret; + u16 adv; + + switch (state->interface) { + case PHY_INTERFACE_MODE_1000BASEX: + case PHY_INTERFACE_MODE_2500BASEX: + adv = ADVERTISE_1000XFULL; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, + state->advertising)) + adv |= ADVERTISE_1000XPAUSE; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + state->advertising)) + adv |= ADVERTISE_1000XPSE_ASYM; + + val = mdiobus_read(bus, addr, MII_ADVERTISE); + if (val < 0) + return val; + + if (val == adv) + return 0; + + ret = mdiobus_write(bus, addr, MII_ADVERTISE, adv); + if (ret < 0) + return ret; + + return 1; + + case PHY_INTERFACE_MODE_SGMII: + val = mdiobus_read(bus, addr, MII_ADVERTISE); + if (val < 0) + return val; + + if (val == 0x0001) + return 0; + + ret = mdiobus_write(bus, addr, MII_ADVERTISE, 0x0001); + if (ret < 0) + return ret; + + return 1; + + default: + /* Nothing to do for other modes */ + return 0; + } +} +EXPORT_SYMBOL_GPL(phylink_mii_c22_pcs_set_advertisement); + +/** + * phylink_mii_c22_pcs_an_restart() - restart 802.3z autonegotiation + * @pcs: a pointer to a &struct mdio_device. + * + * Helper for MAC PCS supporting the 802.3 clause 22 register set for + * clause 37 negotiation. + * + * Restart the clause 37 negotiation with the link partner. This is + * suitable to be directly plugged into the mac_pcs_get_state() member + * of the struct phylink_mac_ops structure. + */ +void phylink_mii_c22_pcs_an_restart(struct mdio_device *pcs) +{ + struct mii_bus *bus = pcs->bus; + int val, addr = pcs->addr; + + val = mdiobus_read(bus, addr, MII_BMCR); + if (val >= 0) { + val |= BMCR_ANRESTART; + + mdiobus_write(bus, addr, MII_BMCR, val); + } +} +EXPORT_SYMBOL_GPL(phylink_mii_c22_pcs_an_restart); + +#define C45_ADDR(d,a) (MII_ADDR_C45 | (d) << 16 | (a)) +void phylink_mii_c45_pcs_get_state(struct mdio_device *pcs, + struct phylink_link_state *state) +{ + struct mii_bus *bus = pcs->bus; + int addr = pcs->addr; + int stat; + + stat = mdiobus_read(bus, addr, C45_ADDR(MDIO_MMD_PCS, MDIO_STAT1)); + if (stat < 0) { + state->link = false; + return; + } + + state->link = !!(stat & MDIO_STAT1_LSTATUS); + if (!state->link) + return; + + switch (state->interface) { + case PHY_INTERFACE_MODE_10GBASER: + state->speed = SPEED_10000; + state->duplex = DUPLEX_FULL; + break; + + default: + break; + } +} +EXPORT_SYMBOL_GPL(phylink_mii_c45_pcs_get_state); + MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index f5fa2fff3ddc..2d99e9de6ee1 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -49,6 +49,8 @@ #define RTL_LPADV_5000FULL BIT(6) #define RTL_LPADV_2500FULL BIT(5) +#define RTLGEN_SPEED_MASK 0x0630 + #define RTL_GENERIC_PHYID 0x001cc800 MODULE_DESCRIPTION("Realtek PHY driver"); @@ -309,6 +311,55 @@ static int rtl8366rb_config_init(struct phy_device *phydev) return ret; } +/* get actual speed to cover the downshift case */ +static int rtlgen_get_speed(struct phy_device *phydev) +{ + int val; + + if (!phydev->link) + return 0; + + val = phy_read_paged(phydev, 0xa43, 0x12); + if (val < 0) + return val; + + switch (val & RTLGEN_SPEED_MASK) { + case 0x0000: + phydev->speed = SPEED_10; + break; + case 0x0010: + phydev->speed = SPEED_100; + break; + case 0x0020: + phydev->speed = SPEED_1000; + break; + case 0x0200: + phydev->speed = SPEED_10000; + break; + case 0x0210: + phydev->speed = SPEED_2500; + break; + case 0x0220: + phydev->speed = SPEED_5000; + break; + default: + break; + } + + return 0; +} + +static int rtlgen_read_status(struct phy_device *phydev) +{ + int ret; + + ret = genphy_read_status(phydev); + if (ret < 0) + return ret; + + return rtlgen_get_speed(phydev); +} + static int rtlgen_read_mmd(struct phy_device *phydev, int devnum, u16 regnum) { int ret; @@ -429,6 +480,8 @@ static int rtl8125_config_aneg(struct phy_device *phydev) static int rtl8125_read_status(struct phy_device *phydev) { + int ret; + if (phydev->autoneg == AUTONEG_ENABLE) { int lpadv = phy_read_paged(phydev, 0xa5d, 0x13); @@ -443,7 +496,11 @@ static int rtl8125_read_status(struct phy_device *phydev) phydev->lp_advertising, lpadv & RTL_LPADV_2500FULL); } - return genphy_read_status(phydev); + ret = genphy_read_status(phydev); + if (ret < 0) + return ret; + + return rtlgen_get_speed(phydev); } static bool rtlgen_supports_2_5gbps(struct phy_device *phydev) @@ -550,6 +607,7 @@ static struct phy_driver realtek_drvs[] = { }, { .name = "Generic FE-GE Realtek PHY", .match_phy_device = rtlgen_match_phy_device, + .read_status = rtlgen_read_status, .suspend = genphy_suspend, .resume = genphy_resume, .read_page = rtl821x_read_page, diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index b73298250793..93da7d3d0954 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -112,8 +112,6 @@ static int lan87xx_read_status(struct phy_device *phydev) int err = genphy_read_status(phydev); if (!phydev->link && priv->energy_enable) { - int i; - /* Disable EDPD to wake up PHY */ int rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS); if (rc < 0) @@ -125,15 +123,11 @@ static int lan87xx_read_status(struct phy_device *phydev) return rc; /* Wait max 640 ms to detect energy */ - for (i = 0; i < 64; i++) { - /* Sleep to allow link test pulses to be sent */ - msleep(10); - rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS); - if (rc < 0) - return rc; - if (rc & MII_LAN83C185_ENERGYON) - break; - } + phy_read_poll_timeout(phydev, MII_LAN83C185_CTRL_STATUS, rc, + rc & MII_LAN83C185_ENERGYON, 10000, + 640000, true); + if (rc < 0) + return rc; /* Re-enable EDPD */ rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS); diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c index 58a69f830d29..f78ceba42e57 100644 --- a/drivers/net/slip/slhc.c +++ b/drivers/net/slip/slhc.c @@ -232,7 +232,7 @@ slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, struct cstate *cs = lcs->next; unsigned long deltaS, deltaA; short changes = 0; - int hlen; + int nlen, hlen; unsigned char new_seq[16]; unsigned char *cp = new_seq; struct iphdr *ip; @@ -248,6 +248,8 @@ slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, return isize; ip = (struct iphdr *) icp; + if (ip->version != 4 || ip->ihl < 5) + return isize; /* Bail if this packet isn't TCP, or is an IP fragment */ if (ip->protocol != IPPROTO_TCP || (ntohs(ip->frag_off) & 0x3fff)) { @@ -258,10 +260,14 @@ slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, comp->sls_o_tcp++; return isize; } - /* Extract TCP header */ + nlen = ip->ihl * 4; + if (isize < nlen + sizeof(*th)) + return isize; - th = (struct tcphdr *)(((unsigned char *)ip) + ip->ihl*4); - hlen = ip->ihl*4 + th->doff*4; + th = (struct tcphdr *)(icp + nlen); + if (th->doff < sizeof(struct tcphdr) / 4) + return isize; + hlen = nlen + th->doff * 4; /* Bail if the TCP packet isn't `compressible' (i.e., ACK isn't set or * some other control bit is set). Also uncompressible if diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index ca70a1d840eb..4004f98e50d9 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2240,6 +2240,8 @@ team_nl_option_policy[TEAM_ATTR_OPTION_MAX + 1] = { [TEAM_ATTR_OPTION_CHANGED] = { .type = NLA_FLAG }, [TEAM_ATTR_OPTION_TYPE] = { .type = NLA_U8 }, [TEAM_ATTR_OPTION_DATA] = { .type = NLA_BINARY }, + [TEAM_ATTR_OPTION_PORT_IFINDEX] = { .type = NLA_U32 }, + [TEAM_ATTR_OPTION_ARRAY_INDEX] = { .type = NLA_U32 }, }; static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 5569077bd5b8..8929669b5e6d 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1172,7 +1172,7 @@ static struct usb_cdc_ncm_ndp32 *cdc_ncm_ndp32(struct cdc_ncm_ctx *ctx, struct s ndp32 = ctx->delayed_ndp32; ndp32->dwSignature = sign; - ndp32->wLength = cpu_to_le32(sizeof(struct usb_cdc_ncm_ndp32) + sizeof(struct usb_cdc_ncm_dpe32)); + ndp32->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp32) + sizeof(struct usb_cdc_ncm_dpe32)); return ndp32; } diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index f27fdd6ab86f..8f8d9883d363 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3199,6 +3199,8 @@ static u16 r8153_phy_status(struct r8152 *tp, u16 desired) } msleep(20); + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + break; } return data; @@ -5380,7 +5382,10 @@ static void r8153_init(struct r8152 *tp) if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) & AUTOLOAD_DONE) break; + msleep(20); + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + break; } data = r8153_phy_status(tp, 0); @@ -5517,7 +5522,10 @@ static void r8153b_init(struct r8152 *tp) if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) & AUTOLOAD_DONE) break; + msleep(20); + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + break; } data = r8153_phy_status(tp, 0); diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 8cdc4415fa70..b6505a6c7102 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -34,16 +34,23 @@ #define VETH_RING_SIZE 256 #define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN) -/* Separating two types of XDP xmit */ -#define VETH_XDP_TX BIT(0) -#define VETH_XDP_REDIR BIT(1) - #define VETH_XDP_TX_BULK_SIZE 16 +struct veth_stats { + u64 rx_drops; + /* xdp */ + u64 xdp_packets; + u64 xdp_bytes; + u64 xdp_redirect; + u64 xdp_drops; + u64 xdp_tx; + u64 xdp_tx_err; + u64 xdp_xmit; + u64 xdp_xmit_err; +}; + struct veth_rq_stats { - u64 xdp_packets; - u64 xdp_bytes; - u64 xdp_drops; + struct veth_stats vs; struct u64_stats_sync syncp; }; @@ -80,12 +87,18 @@ struct veth_q_stat_desc { size_t offset; }; -#define VETH_RQ_STAT(m) offsetof(struct veth_rq_stats, m) +#define VETH_RQ_STAT(m) offsetof(struct veth_stats, m) static const struct veth_q_stat_desc veth_rq_stats_desc[] = { { "xdp_packets", VETH_RQ_STAT(xdp_packets) }, { "xdp_bytes", VETH_RQ_STAT(xdp_bytes) }, - { "xdp_drops", VETH_RQ_STAT(xdp_drops) }, + { "rx_drops", VETH_RQ_STAT(rx_drops) }, + { "rx_xdp_redirect", VETH_RQ_STAT(xdp_redirect) }, + { "rx_xdp_drops", VETH_RQ_STAT(xdp_drops) }, + { "rx_xdp_tx", VETH_RQ_STAT(xdp_tx) }, + { "rx_xdp_tx_errors", VETH_RQ_STAT(xdp_tx_err) }, + { "tx_xdp_xmit", VETH_RQ_STAT(xdp_xmit) }, + { "tx_xdp_xmit_errors", VETH_RQ_STAT(xdp_xmit_err) }, }; #define VETH_RQ_STATS_LEN ARRAY_SIZE(veth_rq_stats_desc) @@ -124,7 +137,7 @@ static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf) for (i = 0; i < dev->real_num_rx_queues; i++) { for (j = 0; j < VETH_RQ_STATS_LEN; j++) { snprintf(p, ETH_GSTRING_LEN, - "rx_queue_%u_%.11s", + "rx_queue_%u_%.18s", i, veth_rq_stats_desc[j].desc); p += ETH_GSTRING_LEN; } @@ -155,7 +168,7 @@ static void veth_get_ethtool_stats(struct net_device *dev, idx = 1; for (i = 0; i < dev->real_num_rx_queues; i++) { const struct veth_rq_stats *rq_stats = &priv->rq[i].stats; - const void *stats_base = (void *)rq_stats; + const void *stats_base = (void *)&rq_stats->vs; unsigned int start; size_t offset; @@ -283,28 +296,34 @@ static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes) return atomic64_read(&priv->dropped); } -static void veth_stats_rx(struct veth_rq_stats *result, struct net_device *dev) +static void veth_stats_rx(struct veth_stats *result, struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); int i; + result->xdp_xmit_err = 0; result->xdp_packets = 0; + result->xdp_tx_err = 0; result->xdp_bytes = 0; - result->xdp_drops = 0; + result->rx_drops = 0; for (i = 0; i < dev->num_rx_queues; i++) { + u64 packets, bytes, drops, xdp_tx_err, xdp_xmit_err; struct veth_rq_stats *stats = &priv->rq[i].stats; - u64 packets, bytes, drops; unsigned int start; do { start = u64_stats_fetch_begin_irq(&stats->syncp); - packets = stats->xdp_packets; - bytes = stats->xdp_bytes; - drops = stats->xdp_drops; + xdp_xmit_err = stats->vs.xdp_xmit_err; + xdp_tx_err = stats->vs.xdp_tx_err; + packets = stats->vs.xdp_packets; + bytes = stats->vs.xdp_bytes; + drops = stats->vs.rx_drops; } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + result->xdp_xmit_err += xdp_xmit_err; + result->xdp_tx_err += xdp_tx_err; result->xdp_packets += packets; result->xdp_bytes += bytes; - result->xdp_drops += drops; + result->rx_drops += drops; } } @@ -313,7 +332,7 @@ static void veth_get_stats64(struct net_device *dev, { struct veth_priv *priv = netdev_priv(dev); struct net_device *peer; - struct veth_rq_stats rx; + struct veth_stats rx; u64 packets, bytes; tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes); @@ -321,18 +340,20 @@ static void veth_get_stats64(struct net_device *dev, tot->tx_packets = packets; veth_stats_rx(&rx, dev); - tot->rx_dropped = rx.xdp_drops; + tot->tx_dropped += rx.xdp_xmit_err + rx.xdp_tx_err; + tot->rx_dropped = rx.rx_drops; tot->rx_bytes = rx.xdp_bytes; tot->rx_packets = rx.xdp_packets; rcu_read_lock(); peer = rcu_dereference(priv->peer); if (peer) { - tot->rx_dropped += veth_stats_tx(peer, &packets, &bytes); + veth_stats_tx(peer, &packets, &bytes); tot->rx_bytes += bytes; tot->rx_packets += packets; veth_stats_rx(&rx, peer); + tot->rx_dropped += rx.xdp_xmit_err + rx.xdp_tx_err; tot->tx_bytes += rx.xdp_bytes; tot->tx_packets += rx.xdp_packets; } @@ -369,28 +390,32 @@ static int veth_select_rxq(struct net_device *dev) } static int veth_xdp_xmit(struct net_device *dev, int n, - struct xdp_frame **frames, u32 flags) + struct xdp_frame **frames, + u32 flags, bool ndo_xmit) { struct veth_priv *rcv_priv, *priv = netdev_priv(dev); + unsigned int qidx, max_len; struct net_device *rcv; int i, ret, drops = n; - unsigned int max_len; struct veth_rq *rq; rcu_read_lock(); if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { - ret = -EINVAL; - goto drop; + rcu_read_unlock(); + atomic64_add(drops, &priv->dropped); + return -EINVAL; } rcv = rcu_dereference(priv->peer); if (unlikely(!rcv)) { - ret = -ENXIO; - goto drop; + rcu_read_unlock(); + atomic64_add(drops, &priv->dropped); + return -ENXIO; } rcv_priv = netdev_priv(rcv); - rq = &rcv_priv->rq[veth_select_rxq(rcv)]; + qidx = veth_select_rxq(rcv); + rq = &rcv_priv->rq[qidx]; /* Non-NULL xdp_prog ensures that xdp_ring is initialized on receive * side. This means an XDP program is loaded on the peer and the peer * device is up. @@ -419,24 +444,35 @@ static int veth_xdp_xmit(struct net_device *dev, int n, if (flags & XDP_XMIT_FLUSH) __veth_xdp_flush(rq); - if (likely(!drops)) { - rcu_read_unlock(); - return n; - } - ret = n - drops; drop: + rq = &priv->rq[qidx]; + u64_stats_update_begin(&rq->stats.syncp); + if (ndo_xmit) { + rq->stats.vs.xdp_xmit += n - drops; + rq->stats.vs.xdp_xmit_err += drops; + } else { + rq->stats.vs.xdp_tx += n - drops; + rq->stats.vs.xdp_tx_err += drops; + } + u64_stats_update_end(&rq->stats.syncp); + rcu_read_unlock(); - atomic64_add(drops, &priv->dropped); return ret; } +static int veth_ndo_xdp_xmit(struct net_device *dev, int n, + struct xdp_frame **frames, u32 flags) +{ + return veth_xdp_xmit(dev, n, frames, flags, true); +} + static void veth_xdp_flush_bq(struct net_device *dev, struct veth_xdp_tx_bq *bq) { int sent, i, err = 0; - sent = veth_xdp_xmit(dev, bq->count, bq->q, 0); + sent = veth_xdp_xmit(dev, bq->count, bq->q, 0, false); if (sent < 0) { err = sent; sent = 0; @@ -489,8 +525,8 @@ static int veth_xdp_tx(struct net_device *dev, struct xdp_buff *xdp, static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq, struct xdp_frame *frame, - unsigned int *xdp_xmit, - struct veth_xdp_tx_bq *bq) + struct veth_xdp_tx_bq *bq, + struct veth_stats *stats) { void *hard_start = frame->data - frame->headroom; void *head = hard_start - sizeof(struct xdp_frame); @@ -526,9 +562,10 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq, if (unlikely(veth_xdp_tx(rq->dev, &xdp, bq) < 0)) { trace_xdp_exception(rq->dev, xdp_prog, act); frame = &orig_frame; + stats->rx_drops++; goto err_xdp; } - *xdp_xmit |= VETH_XDP_TX; + stats->xdp_tx++; rcu_read_unlock(); goto xdp_xmit; case XDP_REDIRECT: @@ -537,9 +574,10 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq, xdp.rxq->mem = frame->mem; if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) { frame = &orig_frame; + stats->rx_drops++; goto err_xdp; } - *xdp_xmit |= VETH_XDP_REDIR; + stats->xdp_redirect++; rcu_read_unlock(); goto xdp_xmit; default: @@ -549,6 +587,7 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq, trace_xdp_exception(rq->dev, xdp_prog, act); /* fall through */ case XDP_DROP: + stats->xdp_drops++; goto err_xdp; } } @@ -558,6 +597,7 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq, skb = veth_build_skb(head, headroom, len, 0); if (!skb) { xdp_return_frame(frame); + stats->rx_drops++; goto err; } @@ -573,9 +613,10 @@ xdp_xmit: return NULL; } -static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb, - unsigned int *xdp_xmit, - struct veth_xdp_tx_bq *bq) +static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, + struct sk_buff *skb, + struct veth_xdp_tx_bq *bq, + struct veth_stats *stats) { u32 pktlen, headroom, act, metalen; void *orig_data, *orig_data_end; @@ -653,18 +694,21 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb, xdp.rxq->mem = rq->xdp_mem; if (unlikely(veth_xdp_tx(rq->dev, &xdp, bq) < 0)) { trace_xdp_exception(rq->dev, xdp_prog, act); + stats->rx_drops++; goto err_xdp; } - *xdp_xmit |= VETH_XDP_TX; + stats->xdp_tx++; rcu_read_unlock(); goto xdp_xmit; case XDP_REDIRECT: get_page(virt_to_page(xdp.data)); consume_skb(skb); xdp.rxq->mem = rq->xdp_mem; - if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) + if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) { + stats->rx_drops++; goto err_xdp; - *xdp_xmit |= VETH_XDP_REDIR; + } + stats->xdp_redirect++; rcu_read_unlock(); goto xdp_xmit; default: @@ -674,7 +718,8 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb, trace_xdp_exception(rq->dev, xdp_prog, act); /* fall through */ case XDP_DROP: - goto drop; + stats->xdp_drops++; + goto xdp_drop; } rcu_read_unlock(); @@ -696,6 +741,8 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb, out: return skb; drop: + stats->rx_drops++; +xdp_drop: rcu_read_unlock(); kfree_skb(skb); return NULL; @@ -706,14 +753,14 @@ xdp_xmit: return NULL; } -static int veth_xdp_rcv(struct veth_rq *rq, int budget, unsigned int *xdp_xmit, - struct veth_xdp_tx_bq *bq) +static int veth_xdp_rcv(struct veth_rq *rq, int budget, + struct veth_xdp_tx_bq *bq, + struct veth_stats *stats) { - int i, done = 0, drops = 0, bytes = 0; + int i, done = 0; for (i = 0; i < budget; i++) { void *ptr = __ptr_ring_consume(&rq->xdp_ring); - unsigned int xdp_xmit_one = 0; struct sk_buff *skb; if (!ptr) @@ -722,27 +769,26 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget, unsigned int *xdp_xmit, if (veth_is_xdp_frame(ptr)) { struct xdp_frame *frame = veth_ptr_to_xdp(ptr); - bytes += frame->len; - skb = veth_xdp_rcv_one(rq, frame, &xdp_xmit_one, bq); + stats->xdp_bytes += frame->len; + skb = veth_xdp_rcv_one(rq, frame, bq, stats); } else { skb = ptr; - bytes += skb->len; - skb = veth_xdp_rcv_skb(rq, skb, &xdp_xmit_one, bq); + stats->xdp_bytes += skb->len; + skb = veth_xdp_rcv_skb(rq, skb, bq, stats); } - *xdp_xmit |= xdp_xmit_one; if (skb) napi_gro_receive(&rq->xdp_napi, skb); - else if (!xdp_xmit_one) - drops++; done++; } u64_stats_update_begin(&rq->stats.syncp); - rq->stats.xdp_packets += done; - rq->stats.xdp_bytes += bytes; - rq->stats.xdp_drops += drops; + rq->stats.vs.xdp_redirect += stats->xdp_redirect; + rq->stats.vs.xdp_bytes += stats->xdp_bytes; + rq->stats.vs.xdp_drops += stats->xdp_drops; + rq->stats.vs.rx_drops += stats->rx_drops; + rq->stats.vs.xdp_packets += done; u64_stats_update_end(&rq->stats.syncp); return done; @@ -752,14 +798,14 @@ static int veth_poll(struct napi_struct *napi, int budget) { struct veth_rq *rq = container_of(napi, struct veth_rq, xdp_napi); - unsigned int xdp_xmit = 0; + struct veth_stats stats = {}; struct veth_xdp_tx_bq bq; int done; bq.count = 0; xdp_set_return_frame_no_direct(); - done = veth_xdp_rcv(rq, budget, &xdp_xmit, &bq); + done = veth_xdp_rcv(rq, budget, &bq, &stats); if (done < budget && napi_complete_done(napi, done)) { /* Write rx_notify_masked before reading ptr_ring */ @@ -770,9 +816,9 @@ static int veth_poll(struct napi_struct *napi, int budget) } } - if (xdp_xmit & VETH_XDP_TX) + if (stats.xdp_tx > 0) veth_xdp_flush(rq->dev, &bq); - if (xdp_xmit & VETH_XDP_REDIR) + if (stats.xdp_redirect > 0) xdp_do_flush(); xdp_clear_return_frame_no_direct(); @@ -1158,7 +1204,7 @@ static const struct net_device_ops veth_netdev_ops = { .ndo_features_check = passthru_features_check, .ndo_set_rx_headroom = veth_set_rx_headroom, .ndo_bpf = veth_xdp, - .ndo_xdp_xmit = veth_xdp_xmit, + .ndo_xdp_xmit = veth_ndo_xdp_xmit, }; #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \ diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index b8228f50bc94..66e00ddc0d42 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -504,7 +504,7 @@ static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) static int vrf_rt6_create(struct net_device *dev) { - int flags = DST_HOST | DST_NOPOLICY | DST_NOXFRM; + int flags = DST_NOPOLICY | DST_NOXFRM; struct net_vrf *vrf = netdev_priv(dev); struct net *net = dev_net(dev); struct rt6_info *rt6; @@ -739,7 +739,7 @@ static int vrf_rtable_create(struct net_device *dev) return -ENOMEM; /* create a dst for routing packets out through a VRF device */ - rth = rt_dst_alloc(dev, 0, RTN_UNICAST, 1, 1, 0); + rth = rt_dst_alloc(dev, 0, RTN_UNICAST, 1, 1); if (!rth) return -ENOMEM; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c index 70b29bf16bb9..60296a754af2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c @@ -308,7 +308,8 @@ iwl_parse_nvm_sections(struct iwl_mvm *mvm) } /* PHY_SKU section is mandatory in B0 */ - if (!mvm->nvm_sections[NVM_SECTION_TYPE_PHY_SKU].data) { + if (mvm->trans->cfg->nvm_type == IWL_NVM_EXT && + !mvm->nvm_sections[NVM_SECTION_TYPE_PHY_SKU].data) { IWL_ERR(mvm, "Can't parse phy_sku in B0, empty sections\n"); return NULL; diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index da0a6b6c4771..7fe8207db6ae 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -4,7 +4,7 @@ * Copyright (c) 2008, Jouni Malinen <j@w1.fi> * Copyright (c) 2011, Javier Lopez <jlopex@gmail.com> * Copyright (c) 2016 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018 - 2020 Intel Corporation */ /* @@ -33,6 +33,9 @@ #include <net/netns/generic.h> #include <linux/rhashtable.h> #include <linux/nospec.h> +#include <linux/virtio.h> +#include <linux/virtio_ids.h> +#include <linux/virtio_config.h> #include "mac80211_hwsim.h" #define WARN_QUEUE 100 @@ -613,14 +616,14 @@ static const struct genl_multicast_group hwsim_mcgrps[] = { /* MAC80211_HWSIM netlink policy */ static const struct nla_policy hwsim_genl_policy[HWSIM_ATTR_MAX + 1] = { - [HWSIM_ATTR_ADDR_RECEIVER] = { .type = NLA_UNSPEC, .len = ETH_ALEN }, - [HWSIM_ATTR_ADDR_TRANSMITTER] = { .type = NLA_UNSPEC, .len = ETH_ALEN }, + [HWSIM_ATTR_ADDR_RECEIVER] = NLA_POLICY_ETH_ADDR_COMPAT, + [HWSIM_ATTR_ADDR_TRANSMITTER] = NLA_POLICY_ETH_ADDR_COMPAT, [HWSIM_ATTR_FRAME] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, [HWSIM_ATTR_FLAGS] = { .type = NLA_U32 }, [HWSIM_ATTR_RX_RATE] = { .type = NLA_U32 }, [HWSIM_ATTR_SIGNAL] = { .type = NLA_U32 }, - [HWSIM_ATTR_TX_INFO] = { .type = NLA_UNSPEC, + [HWSIM_ATTR_TX_INFO] = { .type = NLA_BINARY, .len = IEEE80211_TX_MAX_RATES * sizeof(struct hwsim_tx_rate)}, [HWSIM_ATTR_COOKIE] = { .type = NLA_U64 }, @@ -630,15 +633,61 @@ static const struct nla_policy hwsim_genl_policy[HWSIM_ATTR_MAX + 1] = { [HWSIM_ATTR_REG_CUSTOM_REG] = { .type = NLA_U32 }, [HWSIM_ATTR_REG_STRICT_REG] = { .type = NLA_FLAG }, [HWSIM_ATTR_SUPPORT_P2P_DEVICE] = { .type = NLA_FLAG }, + [HWSIM_ATTR_USE_CHANCTX] = { .type = NLA_FLAG }, [HWSIM_ATTR_DESTROY_RADIO_ON_CLOSE] = { .type = NLA_FLAG }, [HWSIM_ATTR_RADIO_NAME] = { .type = NLA_STRING }, [HWSIM_ATTR_NO_VIF] = { .type = NLA_FLAG }, [HWSIM_ATTR_FREQ] = { .type = NLA_U32 }, - [HWSIM_ATTR_PERM_ADDR] = { .type = NLA_UNSPEC, .len = ETH_ALEN }, + [HWSIM_ATTR_TX_INFO_FLAGS] = { .type = NLA_BINARY }, + [HWSIM_ATTR_PERM_ADDR] = NLA_POLICY_ETH_ADDR_COMPAT, [HWSIM_ATTR_IFTYPE_SUPPORT] = { .type = NLA_U32 }, [HWSIM_ATTR_CIPHER_SUPPORT] = { .type = NLA_BINARY }, }; +#if IS_REACHABLE(CONFIG_VIRTIO) + +/* MAC80211_HWSIM virtio queues */ +static struct virtqueue *hwsim_vqs[HWSIM_NUM_VQS]; +static bool hwsim_virtio_enabled; +static spinlock_t hwsim_virtio_lock; + +static void hwsim_virtio_rx_work(struct work_struct *work); +static DECLARE_WORK(hwsim_virtio_rx, hwsim_virtio_rx_work); + +static int hwsim_tx_virtio(struct mac80211_hwsim_data *data, + struct sk_buff *skb) +{ + struct scatterlist sg[1]; + unsigned long flags; + int err; + + spin_lock_irqsave(&hwsim_virtio_lock, flags); + if (!hwsim_virtio_enabled) { + err = -ENODEV; + goto out_free; + } + + sg_init_one(sg, skb->head, skb_end_offset(skb)); + err = virtqueue_add_outbuf(hwsim_vqs[HWSIM_VQ_TX], sg, 1, skb, + GFP_ATOMIC); + if (err) + goto out_free; + virtqueue_kick(hwsim_vqs[HWSIM_VQ_TX]); + spin_unlock_irqrestore(&hwsim_virtio_lock, flags); + return 0; + +out_free: + spin_unlock_irqrestore(&hwsim_virtio_lock, flags); + nlmsg_free(skb); + return err; +} +#else +/* cause a linker error if this ends up being needed */ +extern int hwsim_tx_virtio(struct mac80211_hwsim_data *data, + struct sk_buff *skb); +#define hwsim_virtio_enabled false +#endif + static void mac80211_hwsim_tx_frame(struct ieee80211_hw *hw, struct sk_buff *skb, struct ieee80211_channel *chan); @@ -1138,8 +1187,14 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, goto nla_put_failure; genlmsg_end(skb, msg_head); - if (hwsim_unicast_netgroup(data, skb, dst_portid)) - goto err_free_txskb; + + if (hwsim_virtio_enabled) { + if (hwsim_tx_virtio(data, skb)) + goto err_free_txskb; + } else { + if (hwsim_unicast_netgroup(data, skb, dst_portid)) + goto err_free_txskb; + } /* Enqueue the packet */ skb_queue_tail(&data->pending, my_skb); @@ -1441,7 +1496,7 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw, /* wmediumd mode check */ _portid = READ_ONCE(data->wmediumd); - if (_portid) + if (_portid || hwsim_virtio_enabled) return mac80211_hwsim_tx_frame_nl(hw, skb, _portid); /* NO wmediumd detected, perfect medium simulation */ @@ -1547,7 +1602,7 @@ static void mac80211_hwsim_tx_frame(struct ieee80211_hw *hw, mac80211_hwsim_monitor_rx(hw, skb, chan); - if (_pid) + if (_pid || hwsim_virtio_enabled) return mac80211_hwsim_tx_frame_nl(hw, skb, _pid); mac80211_hwsim_tx_frame_no_nl(hw, skb, chan); @@ -3293,11 +3348,14 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2, if (!data2) goto out; - if (hwsim_net_get_netgroup(genl_info_net(info)) != data2->netgroup) - goto out; + if (!hwsim_virtio_enabled) { + if (hwsim_net_get_netgroup(genl_info_net(info)) != + data2->netgroup) + goto out; - if (info->snd_portid != data2->wmediumd) - goto out; + if (info->snd_portid != data2->wmediumd) + goto out; + } /* look for the skb matching the cookie passed back from user */ skb_queue_walk_safe(&data2->pending, skb, tmp) { @@ -3387,11 +3445,14 @@ static int hwsim_cloned_frame_received_nl(struct sk_buff *skb_2, if (!data2) goto out; - if (hwsim_net_get_netgroup(genl_info_net(info)) != data2->netgroup) - goto out; + if (!hwsim_virtio_enabled) { + if (hwsim_net_get_netgroup(genl_info_net(info)) != + data2->netgroup) + goto out; - if (info->snd_portid != data2->wmediumd) - goto out; + if (info->snd_portid != data2->wmediumd) + goto out; + } /* check if radio is configured properly */ @@ -3932,6 +3993,229 @@ static void hwsim_exit_netlink(void) genl_unregister_family(&hwsim_genl_family); } +#if IS_REACHABLE(CONFIG_VIRTIO) +static void hwsim_virtio_tx_done(struct virtqueue *vq) +{ + unsigned int len; + struct sk_buff *skb; + unsigned long flags; + + spin_lock_irqsave(&hwsim_virtio_lock, flags); + while ((skb = virtqueue_get_buf(vq, &len))) + nlmsg_free(skb); + spin_unlock_irqrestore(&hwsim_virtio_lock, flags); +} + +static int hwsim_virtio_handle_cmd(struct sk_buff *skb) +{ + struct nlmsghdr *nlh; + struct genlmsghdr *gnlh; + struct nlattr *tb[HWSIM_ATTR_MAX + 1]; + struct genl_info info = {}; + int err; + + nlh = nlmsg_hdr(skb); + gnlh = nlmsg_data(nlh); + err = genlmsg_parse(nlh, &hwsim_genl_family, tb, HWSIM_ATTR_MAX, + hwsim_genl_policy, NULL); + if (err) { + pr_err_ratelimited("hwsim: genlmsg_parse returned %d\n", err); + return err; + } + + info.attrs = tb; + + switch (gnlh->cmd) { + case HWSIM_CMD_FRAME: + hwsim_cloned_frame_received_nl(skb, &info); + break; + case HWSIM_CMD_TX_INFO_FRAME: + hwsim_tx_info_frame_received_nl(skb, &info); + break; + default: + pr_err_ratelimited("hwsim: invalid cmd: %d\n", gnlh->cmd); + return -EPROTO; + } + return 0; +} + +static void hwsim_virtio_rx_work(struct work_struct *work) +{ + struct virtqueue *vq; + unsigned int len; + struct sk_buff *skb; + struct scatterlist sg[1]; + int err; + unsigned long flags; + + spin_lock_irqsave(&hwsim_virtio_lock, flags); + if (!hwsim_virtio_enabled) + goto out_unlock; + + skb = virtqueue_get_buf(hwsim_vqs[HWSIM_VQ_RX], &len); + if (!skb) + goto out_unlock; + spin_unlock_irqrestore(&hwsim_virtio_lock, flags); + + skb->data = skb->head; + skb_set_tail_pointer(skb, len); + hwsim_virtio_handle_cmd(skb); + + spin_lock_irqsave(&hwsim_virtio_lock, flags); + if (!hwsim_virtio_enabled) { + nlmsg_free(skb); + goto out_unlock; + } + vq = hwsim_vqs[HWSIM_VQ_RX]; + sg_init_one(sg, skb->head, skb_end_offset(skb)); + err = virtqueue_add_inbuf(vq, sg, 1, skb, GFP_KERNEL); + if (WARN(err, "virtqueue_add_inbuf returned %d\n", err)) + nlmsg_free(skb); + else + virtqueue_kick(vq); + schedule_work(&hwsim_virtio_rx); + +out_unlock: + spin_unlock_irqrestore(&hwsim_virtio_lock, flags); +} + +static void hwsim_virtio_rx_done(struct virtqueue *vq) +{ + schedule_work(&hwsim_virtio_rx); +} + +static int init_vqs(struct virtio_device *vdev) +{ + vq_callback_t *callbacks[HWSIM_NUM_VQS] = { + [HWSIM_VQ_TX] = hwsim_virtio_tx_done, + [HWSIM_VQ_RX] = hwsim_virtio_rx_done, + }; + const char *names[HWSIM_NUM_VQS] = { + [HWSIM_VQ_TX] = "tx", + [HWSIM_VQ_RX] = "rx", + }; + + return virtio_find_vqs(vdev, HWSIM_NUM_VQS, + hwsim_vqs, callbacks, names, NULL); +} + +static int fill_vq(struct virtqueue *vq) +{ + int i, err; + struct sk_buff *skb; + struct scatterlist sg[1]; + + for (i = 0; i < virtqueue_get_vring_size(vq); i++) { + skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + sg_init_one(sg, skb->head, skb_end_offset(skb)); + err = virtqueue_add_inbuf(vq, sg, 1, skb, GFP_KERNEL); + if (err) { + nlmsg_free(skb); + return err; + } + } + virtqueue_kick(vq); + return 0; +} + +static void remove_vqs(struct virtio_device *vdev) +{ + int i; + + vdev->config->reset(vdev); + + for (i = 0; i < ARRAY_SIZE(hwsim_vqs); i++) { + struct virtqueue *vq = hwsim_vqs[i]; + struct sk_buff *skb; + + while ((skb = virtqueue_detach_unused_buf(vq))) + nlmsg_free(skb); + } + + vdev->config->del_vqs(vdev); +} + +static int hwsim_virtio_probe(struct virtio_device *vdev) +{ + int err; + unsigned long flags; + + spin_lock_irqsave(&hwsim_virtio_lock, flags); + if (hwsim_virtio_enabled) { + spin_unlock_irqrestore(&hwsim_virtio_lock, flags); + return -EEXIST; + } + spin_unlock_irqrestore(&hwsim_virtio_lock, flags); + + err = init_vqs(vdev); + if (err) + return err; + + err = fill_vq(hwsim_vqs[HWSIM_VQ_RX]); + if (err) + goto out_remove; + + spin_lock_irqsave(&hwsim_virtio_lock, flags); + hwsim_virtio_enabled = true; + spin_unlock_irqrestore(&hwsim_virtio_lock, flags); + + schedule_work(&hwsim_virtio_rx); + return 0; + +out_remove: + remove_vqs(vdev); + return err; +} + +static void hwsim_virtio_remove(struct virtio_device *vdev) +{ + hwsim_virtio_enabled = false; + + cancel_work_sync(&hwsim_virtio_rx); + + remove_vqs(vdev); +} + +/* MAC80211_HWSIM virtio device id table */ +static const struct virtio_device_id id_table[] = { + { VIRTIO_ID_MAC80211_HWSIM, VIRTIO_DEV_ANY_ID }, + { 0 } +}; +MODULE_DEVICE_TABLE(virtio, id_table); + +static struct virtio_driver virtio_hwsim = { + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = hwsim_virtio_probe, + .remove = hwsim_virtio_remove, +}; + +static int hwsim_register_virtio_driver(void) +{ + spin_lock_init(&hwsim_virtio_lock); + + return register_virtio_driver(&virtio_hwsim); +} + +static void hwsim_unregister_virtio_driver(void) +{ + unregister_virtio_driver(&virtio_hwsim); +} +#else +static inline int hwsim_register_virtio_driver(void) +{ + return 0; +} + +static inline void hwsim_unregister_virtio_driver(void) +{ +} +#endif + static int __init init_mac80211_hwsim(void) { int i, err; @@ -3960,10 +4244,14 @@ static int __init init_mac80211_hwsim(void) if (err) goto out_unregister_driver; + err = hwsim_register_virtio_driver(); + if (err) + goto out_exit_netlink; + hwsim_class = class_create(THIS_MODULE, "mac80211_hwsim"); if (IS_ERR(hwsim_class)) { err = PTR_ERR(hwsim_class); - goto out_exit_netlink; + goto out_exit_virtio; } for (i = 0; i < radios; i++) { @@ -4075,6 +4363,8 @@ out_free_mon: free_netdev(hwsim_mon); out_free_radios: mac80211_hwsim_free(); +out_exit_virtio: + hwsim_unregister_virtio_driver(); out_exit_netlink: hwsim_exit_netlink(); out_unregister_driver: @@ -4091,6 +4381,7 @@ static void __exit exit_mac80211_hwsim(void) { pr_debug("mac80211_hwsim: unregister radios\n"); + hwsim_unregister_virtio_driver(); hwsim_exit_netlink(); mac80211_hwsim_free(); diff --git a/drivers/net/wireless/mac80211_hwsim.h b/drivers/net/wireless/mac80211_hwsim.h index a85bc7c5c030..28ade92adcb4 100644 --- a/drivers/net/wireless/mac80211_hwsim.h +++ b/drivers/net/wireless/mac80211_hwsim.h @@ -3,6 +3,7 @@ * mac80211_hwsim - software simulator of 802.11 radio(s) for mac80211 * Copyright (c) 2008, Jouni Malinen <j@w1.fi> * Copyright (c) 2011, Javier Lopez <jlopex@gmail.com> + * Copyright (C) 2020 Intel Corporation */ #ifndef __MAC80211_HWSIM_H @@ -245,4 +246,24 @@ struct hwsim_tx_rate_flag { s8 idx; u16 flags; } __packed; + +/** + * DOC: Frame transmission support over virtio + * + * Frame transmission is also supported over virtio to allow communication + * with external entities. + */ + +/** + * enum hwsim_vqs - queues for virtio frame transmission + * + * @HWSIM_VQ_TX: send frames to external entity + * @HWSIM_VQ_RX: receive frames and transmission info reports + * @HWSIM_NUM_VQS: enum limit + */ +enum { + HWSIM_VQ_TX, + HWSIM_VQ_RX, + HWSIM_NUM_VQS, +}; #endif /* __MAC80211_HWSIM_H */ diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index e5dd7080e88e..75e659774e07 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -467,10 +467,13 @@ mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data, struct page *page = virt_to_head_page(data); int offset = data - page_address(page); struct sk_buff *skb = q->rx_head; + struct skb_shared_info *shinfo = skb_shinfo(skb); - offset += q->buf_offset; - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, len, - q->buf_size); + if (shinfo->nr_frags < ARRAY_SIZE(shinfo->frags)) { + offset += q->buf_offset; + skb_add_rx_frag(skb, shinfo->nr_frags, page, offset, len, + q->buf_size); + } if (more) return; diff --git a/drivers/net/wireless/virt_wifi.c b/drivers/net/wireless/virt_wifi.c index 01305ba2d3aa..c878097f0dda 100644 --- a/drivers/net/wireless/virt_wifi.c +++ b/drivers/net/wireless/virt_wifi.c @@ -436,10 +436,18 @@ static int virt_wifi_net_device_stop(struct net_device *dev) return 0; } +static int virt_wifi_net_device_get_iflink(const struct net_device *dev) +{ + struct virt_wifi_netdev_priv *priv = netdev_priv(dev); + + return priv->lowerdev->ifindex; +} + static const struct net_device_ops virt_wifi_ops = { .ndo_start_xmit = virt_wifi_start_xmit, - .ndo_open = virt_wifi_net_device_open, - .ndo_stop = virt_wifi_net_device_stop, + .ndo_open = virt_wifi_net_device_open, + .ndo_stop = virt_wifi_net_device_stop, + .ndo_get_iflink = virt_wifi_net_device_get_iflink, }; /* Invoked as part of rtnl lock release. */ diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index ace4dd9e953c..d3f23d6254e4 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1078,9 +1078,9 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx) spin_lock(&nvmeq->cq_poll_lock); found = nvme_process_cq(nvmeq, &start, &end, -1); + nvme_complete_cqes(nvmeq, start, end); spin_unlock(&nvmeq->cq_poll_lock); - nvme_complete_cqes(nvmeq, start, end); return found; } diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 8270bbf505fb..9f982c0627a0 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -306,6 +306,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) rc = of_mdiobus_register_phy(mdio, child, addr); if (rc && rc != -ENODEV) goto unregister; + break; } } } diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c index d20aabc26273..3a10e678c7f4 100644 --- a/drivers/pci/controller/pcie-brcmstb.c +++ b/drivers/pci/controller/pcie-brcmstb.c @@ -670,7 +670,7 @@ static inline int brcm_pcie_get_rc_bar2_size_and_offset(struct brcm_pcie *pcie, * outbound memory @ 3GB). So instead it will start at the 1x * multiple of its size */ - if (!*rc_bar2_size || *rc_bar2_offset % *rc_bar2_size || + if (!*rc_bar2_size || (*rc_bar2_offset & (*rc_bar2_size - 1)) || (*rc_bar2_offset < SZ_4G && *rc_bar2_offset > SZ_2G)) { dev_err(dev, "Invalid rc_bar2_offset/size: size 0x%llx, off 0x%llx\n", *rc_bar2_size, *rc_bar2_offset); diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c index acce8781c456..f5c7a845cd7b 100644 --- a/drivers/perf/arm_pmu_acpi.c +++ b/drivers/perf/arm_pmu_acpi.c @@ -24,8 +24,6 @@ static int arm_pmu_acpi_register_irq(int cpu) int gsi, trigger; gicc = acpi_cpu_get_madt_gicc(cpu); - if (WARN_ON(!gicc)) - return -EINVAL; gsi = gicc->performance_interrupt; @@ -64,11 +62,10 @@ static void arm_pmu_acpi_unregister_irq(int cpu) int gsi; gicc = acpi_cpu_get_madt_gicc(cpu); - if (!gicc) - return; gsi = gicc->performance_interrupt; - acpi_unregister_gsi(gsi); + if (gsi) + acpi_unregister_gsi(gsi); } #if IS_ENABLED(CONFIG_ARM_SPE_PMU) diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 95dca2cb5265..90884d14f95f 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -388,9 +388,10 @@ static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config, if (enable) { /* - * must disable first, then enable again - * otherwise, cycle counter will not work - * if previous state is enabled. + * cycle counter is special which should firstly write 0 then + * write 1 into CLEAR bit to clear it. Other counters only + * need write 0 into CLEAR bit and it turns out to be 1 by + * hardware. Below enable flow is harmless for all counters. */ writel(0, pmu->base + reg); val = CNTL_EN | CNTL_CLEAR; @@ -398,7 +399,8 @@ static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config, writel(val, pmu->base + reg); } else { /* Disable counter */ - writel(0, pmu->base + reg); + val = readl_relaxed(pmu->base + reg) & CNTL_EN_MASK; + writel(val, pmu->base + reg); } } diff --git a/drivers/phy/allwinner/phy-sun50i-usb3.c b/drivers/phy/allwinner/phy-sun50i-usb3.c index 1169f3e83a6f..b1c04f71a31d 100644 --- a/drivers/phy/allwinner/phy-sun50i-usb3.c +++ b/drivers/phy/allwinner/phy-sun50i-usb3.c @@ -49,7 +49,7 @@ #define SUNXI_LOS_BIAS(n) ((n) << 3) #define SUNXI_LOS_BIAS_MASK GENMASK(5, 3) #define SUNXI_TXVBOOSTLVL(n) ((n) << 0) -#define SUNXI_TXVBOOSTLVL_MASK GENMASK(0, 2) +#define SUNXI_TXVBOOSTLVL_MASK GENMASK(2, 0) struct sun50i_usb3_phy { struct phy *phy; diff --git a/drivers/phy/broadcom/phy-brcm-sata.c b/drivers/phy/broadcom/phy-brcm-sata.c index 4710cfcc3037..18251f232172 100644 --- a/drivers/phy/broadcom/phy-brcm-sata.c +++ b/drivers/phy/broadcom/phy-brcm-sata.c @@ -186,29 +186,6 @@ enum sata_phy_ctrl_regs { PHY_CTRL_1_RESET = BIT(0), }; -static inline void __iomem *brcm_sata_pcb_base(struct brcm_sata_port *port) -{ - struct brcm_sata_phy *priv = port->phy_priv; - u32 size = 0; - - switch (priv->version) { - case BRCM_SATA_PHY_STB_16NM: - case BRCM_SATA_PHY_STB_28NM: - case BRCM_SATA_PHY_IPROC_NS2: - case BRCM_SATA_PHY_DSL_28NM: - size = SATA_PCB_REG_28NM_SPACE_SIZE; - break; - case BRCM_SATA_PHY_STB_40NM: - size = SATA_PCB_REG_40NM_SPACE_SIZE; - break; - default: - dev_err(priv->dev, "invalid phy version\n"); - break; - } - - return priv->phy_base + (port->portnum * size); -} - static inline void __iomem *brcm_sata_ctrl_base(struct brcm_sata_port *port) { struct brcm_sata_phy *priv = port->phy_priv; @@ -226,19 +203,34 @@ static inline void __iomem *brcm_sata_ctrl_base(struct brcm_sata_port *port) return priv->ctrl_base + (port->portnum * size); } -static void brcm_sata_phy_wr(void __iomem *pcb_base, u32 bank, +static void brcm_sata_phy_wr(struct brcm_sata_port *port, u32 bank, u32 ofs, u32 msk, u32 value) { + struct brcm_sata_phy *priv = port->phy_priv; + void __iomem *pcb_base = priv->phy_base; u32 tmp; + if (priv->version == BRCM_SATA_PHY_STB_40NM) + bank += (port->portnum * SATA_PCB_REG_40NM_SPACE_SIZE); + else + pcb_base += (port->portnum * SATA_PCB_REG_28NM_SPACE_SIZE); + writel(bank, pcb_base + SATA_PCB_BANK_OFFSET); tmp = readl(pcb_base + SATA_PCB_REG_OFFSET(ofs)); tmp = (tmp & msk) | value; writel(tmp, pcb_base + SATA_PCB_REG_OFFSET(ofs)); } -static u32 brcm_sata_phy_rd(void __iomem *pcb_base, u32 bank, u32 ofs) +static u32 brcm_sata_phy_rd(struct brcm_sata_port *port, u32 bank, u32 ofs) { + struct brcm_sata_phy *priv = port->phy_priv; + void __iomem *pcb_base = priv->phy_base; + + if (priv->version == BRCM_SATA_PHY_STB_40NM) + bank += (port->portnum * SATA_PCB_REG_40NM_SPACE_SIZE); + else + pcb_base += (port->portnum * SATA_PCB_REG_28NM_SPACE_SIZE); + writel(bank, pcb_base + SATA_PCB_BANK_OFFSET); return readl(pcb_base + SATA_PCB_REG_OFFSET(ofs)); } @@ -250,16 +242,15 @@ static u32 brcm_sata_phy_rd(void __iomem *pcb_base, u32 bank, u32 ofs) static void brcm_stb_sata_ssc_init(struct brcm_sata_port *port) { - void __iomem *base = brcm_sata_pcb_base(port); struct brcm_sata_phy *priv = port->phy_priv; u32 tmp; /* override the TX spread spectrum setting */ tmp = TXPMD_CONTROL1_TX_SSC_EN_FRC_VAL | TXPMD_CONTROL1_TX_SSC_EN_FRC; - brcm_sata_phy_wr(base, TXPMD_REG_BANK, TXPMD_CONTROL1, ~tmp, tmp); + brcm_sata_phy_wr(port, TXPMD_REG_BANK, TXPMD_CONTROL1, ~tmp, tmp); /* set fixed min freq */ - brcm_sata_phy_wr(base, TXPMD_REG_BANK, TXPMD_TX_FREQ_CTRL_CONTROL2, + brcm_sata_phy_wr(port, TXPMD_REG_BANK, TXPMD_TX_FREQ_CTRL_CONTROL2, ~TXPMD_TX_FREQ_CTRL_CONTROL2_FMIN_MASK, STB_FMIN_VAL_DEFAULT); @@ -271,7 +262,7 @@ static void brcm_stb_sata_ssc_init(struct brcm_sata_port *port) tmp = STB_FMAX_VAL_DEFAULT; } - brcm_sata_phy_wr(base, TXPMD_REG_BANK, TXPMD_TX_FREQ_CTRL_CONTROL3, + brcm_sata_phy_wr(port, TXPMD_REG_BANK, TXPMD_TX_FREQ_CTRL_CONTROL3, ~TXPMD_TX_FREQ_CTRL_CONTROL3_FMAX_MASK, tmp); } @@ -280,7 +271,6 @@ static void brcm_stb_sata_ssc_init(struct brcm_sata_port *port) static int brcm_stb_sata_rxaeq_init(struct brcm_sata_port *port) { - void __iomem *base = brcm_sata_pcb_base(port); u32 tmp = 0, reg = 0; switch (port->rxaeq_mode) { @@ -301,8 +291,8 @@ static int brcm_stb_sata_rxaeq_init(struct brcm_sata_port *port) break; } - brcm_sata_phy_wr(base, AEQRX_REG_BANK_0, reg, ~tmp, tmp); - brcm_sata_phy_wr(base, AEQRX_REG_BANK_1, reg, ~tmp, tmp); + brcm_sata_phy_wr(port, AEQRX_REG_BANK_0, reg, ~tmp, tmp); + brcm_sata_phy_wr(port, AEQRX_REG_BANK_1, reg, ~tmp, tmp); return 0; } @@ -316,18 +306,17 @@ static int brcm_stb_sata_init(struct brcm_sata_port *port) static int brcm_stb_sata_16nm_ssc_init(struct brcm_sata_port *port) { - void __iomem *base = brcm_sata_pcb_base(port); u32 tmp, value; /* Reduce CP tail current to 1/16th of its default value */ - brcm_sata_phy_wr(base, PLL1_REG_BANK, PLL1_ACTRL6, 0, 0x141); + brcm_sata_phy_wr(port, PLL1_REG_BANK, PLL1_ACTRL6, 0, 0x141); /* Turn off CP tail current boost */ - brcm_sata_phy_wr(base, PLL1_REG_BANK, PLL1_ACTRL8, 0, 0xc006); + brcm_sata_phy_wr(port, PLL1_REG_BANK, PLL1_ACTRL8, 0, 0xc006); /* Set a specific AEQ equalizer value */ tmp = AEQ_FRC_EQ_FORCE_VAL | AEQ_FRC_EQ_FORCE; - brcm_sata_phy_wr(base, AEQRX_REG_BANK_0, AEQ_FRC_EQ, + brcm_sata_phy_wr(port, AEQRX_REG_BANK_0, AEQ_FRC_EQ, ~(tmp | AEQ_RFZ_FRC_VAL | AEQ_FRC_EQ_VAL_MASK << AEQ_FRC_EQ_VAL_SHIFT), tmp | 32 << AEQ_FRC_EQ_VAL_SHIFT); @@ -337,7 +326,7 @@ static int brcm_stb_sata_16nm_ssc_init(struct brcm_sata_port *port) value = 0x52; else value = 0; - brcm_sata_phy_wr(base, RXPMD_REG_BANK, RXPMD_RX_CDR_CONTROL1, + brcm_sata_phy_wr(port, RXPMD_REG_BANK, RXPMD_RX_CDR_CONTROL1, ~RXPMD_RX_PPM_VAL_MASK, value); /* Set proportional loop bandwith Gen1/2/3 */ @@ -352,7 +341,7 @@ static int brcm_stb_sata_16nm_ssc_init(struct brcm_sata_port *port) value = 1 << RXPMD_G1_CDR_PROP_BW_SHIFT | 1 << RXPMD_G2_CDR_PROP_BW_SHIFT | 1 << RXPMD_G3_CDR_PROB_BW_SHIFT; - brcm_sata_phy_wr(base, RXPMD_REG_BANK, RXPMD_RX_CDR_CDR_PROP_BW, ~tmp, + brcm_sata_phy_wr(port, RXPMD_REG_BANK, RXPMD_RX_CDR_CDR_PROP_BW, ~tmp, value); /* Set CDR integral loop acquisition bandwidth for Gen1/2/3 */ @@ -365,7 +354,7 @@ static int brcm_stb_sata_16nm_ssc_init(struct brcm_sata_port *port) 1 << RXPMD_G3_CDR_ACQ_INT_BW_SHIFT; else value = 0; - brcm_sata_phy_wr(base, RXPMD_REG_BANK, RXPMD_RX_CDR_CDR_ACQ_INTEG_BW, + brcm_sata_phy_wr(port, RXPMD_REG_BANK, RXPMD_RX_CDR_CDR_ACQ_INTEG_BW, ~tmp, value); /* Set CDR integral loop locking bandwidth to 1 for Gen 1/2/3 */ @@ -378,7 +367,7 @@ static int brcm_stb_sata_16nm_ssc_init(struct brcm_sata_port *port) 1 << RXPMD_G3_CDR_LOCK_INT_BW_SHIFT; else value = 0; - brcm_sata_phy_wr(base, RXPMD_REG_BANK, RXPMD_RX_CDR_CDR_LOCK_INTEG_BW, + brcm_sata_phy_wr(port, RXPMD_REG_BANK, RXPMD_RX_CDR_CDR_LOCK_INTEG_BW, ~tmp, value); /* Set no guard band and clamp CDR */ @@ -387,11 +376,11 @@ static int brcm_stb_sata_16nm_ssc_init(struct brcm_sata_port *port) value = 0x51; else value = 0; - brcm_sata_phy_wr(base, RXPMD_REG_BANK, RXPMD_RX_FREQ_MON_CONTROL1, + brcm_sata_phy_wr(port, RXPMD_REG_BANK, RXPMD_RX_FREQ_MON_CONTROL1, ~tmp, RXPMD_MON_CORRECT_EN | value); /* Turn on/off SSC */ - brcm_sata_phy_wr(base, TX_REG_BANK, TX_ACTRL5, ~TX_ACTRL5_SSC_EN, + brcm_sata_phy_wr(port, TX_REG_BANK, TX_ACTRL5, ~TX_ACTRL5_SSC_EN, port->ssc_en ? TX_ACTRL5_SSC_EN : 0); return 0; @@ -411,7 +400,6 @@ static int brcm_ns2_sata_init(struct brcm_sata_port *port) { int try; unsigned int val; - void __iomem *base = brcm_sata_pcb_base(port); void __iomem *ctrl_base = brcm_sata_ctrl_base(port); struct device *dev = port->phy_priv->dev; @@ -421,24 +409,24 @@ static int brcm_ns2_sata_init(struct brcm_sata_port *port) val |= (0x4 << OOB_CTRL1_BURST_MIN_SHIFT); val |= (0x9 << OOB_CTRL1_WAKE_IDLE_MAX_SHIFT); val |= (0x3 << OOB_CTRL1_WAKE_IDLE_MIN_SHIFT); - brcm_sata_phy_wr(base, OOB_REG_BANK, OOB_CTRL1, 0x0, val); + brcm_sata_phy_wr(port, OOB_REG_BANK, OOB_CTRL1, 0x0, val); val = 0x0; val |= (0x1b << OOB_CTRL2_RESET_IDLE_MAX_SHIFT); val |= (0x2 << OOB_CTRL2_BURST_CNT_SHIFT); val |= (0x9 << OOB_CTRL2_RESET_IDLE_MIN_SHIFT); - brcm_sata_phy_wr(base, OOB_REG_BANK, OOB_CTRL2, 0x0, val); + brcm_sata_phy_wr(port, OOB_REG_BANK, OOB_CTRL2, 0x0, val); /* Configure PHY PLL register bank 1 */ val = NS2_PLL1_ACTRL2_MAGIC; - brcm_sata_phy_wr(base, PLL1_REG_BANK, PLL1_ACTRL2, 0x0, val); + brcm_sata_phy_wr(port, PLL1_REG_BANK, PLL1_ACTRL2, 0x0, val); val = NS2_PLL1_ACTRL3_MAGIC; - brcm_sata_phy_wr(base, PLL1_REG_BANK, PLL1_ACTRL3, 0x0, val); + brcm_sata_phy_wr(port, PLL1_REG_BANK, PLL1_ACTRL3, 0x0, val); val = NS2_PLL1_ACTRL4_MAGIC; - brcm_sata_phy_wr(base, PLL1_REG_BANK, PLL1_ACTRL4, 0x0, val); + brcm_sata_phy_wr(port, PLL1_REG_BANK, PLL1_ACTRL4, 0x0, val); /* Configure PHY BLOCK0 register bank */ /* Set oob_clk_sel to refclk/2 */ - brcm_sata_phy_wr(base, BLOCK0_REG_BANK, BLOCK0_SPARE, + brcm_sata_phy_wr(port, BLOCK0_REG_BANK, BLOCK0_SPARE, ~BLOCK0_SPARE_OOB_CLK_SEL_MASK, BLOCK0_SPARE_OOB_CLK_SEL_REFBY2); @@ -451,7 +439,7 @@ static int brcm_ns2_sata_init(struct brcm_sata_port *port) /* Wait for PHY PLL lock by polling pll_lock bit */ try = 50; while (try) { - val = brcm_sata_phy_rd(base, BLOCK0_REG_BANK, + val = brcm_sata_phy_rd(port, BLOCK0_REG_BANK, BLOCK0_XGXSSTATUS); if (val & BLOCK0_XGXSSTATUS_PLL_LOCK) break; @@ -471,9 +459,7 @@ static int brcm_ns2_sata_init(struct brcm_sata_port *port) static int brcm_nsp_sata_init(struct brcm_sata_port *port) { - struct brcm_sata_phy *priv = port->phy_priv; struct device *dev = port->phy_priv->dev; - void __iomem *base = priv->phy_base; unsigned int oob_bank; unsigned int val, try; @@ -490,36 +476,36 @@ static int brcm_nsp_sata_init(struct brcm_sata_port *port) val |= (0x06 << OOB_CTRL1_BURST_MIN_SHIFT); val |= (0x0f << OOB_CTRL1_WAKE_IDLE_MAX_SHIFT); val |= (0x06 << OOB_CTRL1_WAKE_IDLE_MIN_SHIFT); - brcm_sata_phy_wr(base, oob_bank, OOB_CTRL1, 0x0, val); + brcm_sata_phy_wr(port, oob_bank, OOB_CTRL1, 0x0, val); val = 0x0; val |= (0x2e << OOB_CTRL2_RESET_IDLE_MAX_SHIFT); val |= (0x02 << OOB_CTRL2_BURST_CNT_SHIFT); val |= (0x16 << OOB_CTRL2_RESET_IDLE_MIN_SHIFT); - brcm_sata_phy_wr(base, oob_bank, OOB_CTRL2, 0x0, val); + brcm_sata_phy_wr(port, oob_bank, OOB_CTRL2, 0x0, val); - brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_ACTRL2, + brcm_sata_phy_wr(port, PLL_REG_BANK_0, PLL_ACTRL2, ~(PLL_ACTRL2_SELDIV_MASK << PLL_ACTRL2_SELDIV_SHIFT), 0x0c << PLL_ACTRL2_SELDIV_SHIFT); - brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_CAP_CONTROL, + brcm_sata_phy_wr(port, PLL_REG_BANK_0, PLL_CAP_CONTROL, 0xff0, 0x4f0); val = PLLCONTROL_0_FREQ_DET_RESTART | PLLCONTROL_0_FREQ_MONITOR; - brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_REG_BANK_0_PLLCONTROL_0, + brcm_sata_phy_wr(port, PLL_REG_BANK_0, PLL_REG_BANK_0_PLLCONTROL_0, ~val, val); val = PLLCONTROL_0_SEQ_START; - brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_REG_BANK_0_PLLCONTROL_0, + brcm_sata_phy_wr(port, PLL_REG_BANK_0, PLL_REG_BANK_0_PLLCONTROL_0, ~val, 0); mdelay(10); - brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_REG_BANK_0_PLLCONTROL_0, + brcm_sata_phy_wr(port, PLL_REG_BANK_0, PLL_REG_BANK_0_PLLCONTROL_0, ~val, val); /* Wait for pll_seq_done bit */ try = 50; while (--try) { - val = brcm_sata_phy_rd(base, BLOCK0_REG_BANK, + val = brcm_sata_phy_rd(port, BLOCK0_REG_BANK, BLOCK0_XGXSSTATUS); if (val & BLOCK0_XGXSSTATUS_PLL_LOCK) break; @@ -546,27 +532,25 @@ static int brcm_nsp_sata_init(struct brcm_sata_port *port) static int brcm_sr_sata_init(struct brcm_sata_port *port) { - struct brcm_sata_phy *priv = port->phy_priv; struct device *dev = port->phy_priv->dev; - void __iomem *base = priv->phy_base; unsigned int val, try; /* Configure PHY PLL register bank 1 */ val = SR_PLL1_ACTRL2_MAGIC; - brcm_sata_phy_wr(base, PLL1_REG_BANK, PLL1_ACTRL2, 0x0, val); + brcm_sata_phy_wr(port, PLL1_REG_BANK, PLL1_ACTRL2, 0x0, val); val = SR_PLL1_ACTRL3_MAGIC; - brcm_sata_phy_wr(base, PLL1_REG_BANK, PLL1_ACTRL3, 0x0, val); + brcm_sata_phy_wr(port, PLL1_REG_BANK, PLL1_ACTRL3, 0x0, val); val = SR_PLL1_ACTRL4_MAGIC; - brcm_sata_phy_wr(base, PLL1_REG_BANK, PLL1_ACTRL4, 0x0, val); + brcm_sata_phy_wr(port, PLL1_REG_BANK, PLL1_ACTRL4, 0x0, val); /* Configure PHY PLL register bank 0 */ val = SR_PLL0_ACTRL6_MAGIC; - brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_ACTRL6, 0x0, val); + brcm_sata_phy_wr(port, PLL_REG_BANK_0, PLL_ACTRL6, 0x0, val); /* Wait for PHY PLL lock by polling pll_lock bit */ try = 50; do { - val = brcm_sata_phy_rd(base, BLOCK0_REG_BANK, + val = brcm_sata_phy_rd(port, BLOCK0_REG_BANK, BLOCK0_XGXSSTATUS); if (val & BLOCK0_XGXSSTATUS_PLL_LOCK) break; @@ -581,7 +565,7 @@ static int brcm_sr_sata_init(struct brcm_sata_port *port) } /* Invert Tx polarity */ - brcm_sata_phy_wr(base, TX_REG_BANK, TX_ACTRL0, + brcm_sata_phy_wr(port, TX_REG_BANK, TX_ACTRL0, ~TX_ACTRL0_TXPOL_FLIP, TX_ACTRL0_TXPOL_FLIP); /* Configure OOB control to handle 100MHz reference clock */ @@ -589,52 +573,51 @@ static int brcm_sr_sata_init(struct brcm_sata_port *port) (0x4 << OOB_CTRL1_BURST_MIN_SHIFT) | (0x8 << OOB_CTRL1_WAKE_IDLE_MAX_SHIFT) | (0x3 << OOB_CTRL1_WAKE_IDLE_MIN_SHIFT)); - brcm_sata_phy_wr(base, OOB_REG_BANK, OOB_CTRL1, 0x0, val); + brcm_sata_phy_wr(port, OOB_REG_BANK, OOB_CTRL1, 0x0, val); val = ((0x1b << OOB_CTRL2_RESET_IDLE_MAX_SHIFT) | (0x2 << OOB_CTRL2_BURST_CNT_SHIFT) | (0x9 << OOB_CTRL2_RESET_IDLE_MIN_SHIFT)); - brcm_sata_phy_wr(base, OOB_REG_BANK, OOB_CTRL2, 0x0, val); + brcm_sata_phy_wr(port, OOB_REG_BANK, OOB_CTRL2, 0x0, val); return 0; } static int brcm_dsl_sata_init(struct brcm_sata_port *port) { - void __iomem *base = brcm_sata_pcb_base(port); struct device *dev = port->phy_priv->dev; unsigned int try; u32 tmp; - brcm_sata_phy_wr(base, PLL1_REG_BANK, PLL1_ACTRL7, 0, 0x873); + brcm_sata_phy_wr(port, PLL1_REG_BANK, PLL1_ACTRL7, 0, 0x873); - brcm_sata_phy_wr(base, PLL1_REG_BANK, PLL1_ACTRL6, 0, 0xc000); + brcm_sata_phy_wr(port, PLL1_REG_BANK, PLL1_ACTRL6, 0, 0xc000); - brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_REG_BANK_0_PLLCONTROL_0, + brcm_sata_phy_wr(port, PLL_REG_BANK_0, PLL_REG_BANK_0_PLLCONTROL_0, 0, 0x3089); usleep_range(1000, 2000); - brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_REG_BANK_0_PLLCONTROL_0, + brcm_sata_phy_wr(port, PLL_REG_BANK_0, PLL_REG_BANK_0_PLLCONTROL_0, 0, 0x3088); usleep_range(1000, 2000); - brcm_sata_phy_wr(base, AEQRX_REG_BANK_1, AEQRX_SLCAL0_CTRL0, + brcm_sata_phy_wr(port, AEQRX_REG_BANK_1, AEQRX_SLCAL0_CTRL0, 0, 0x3000); - brcm_sata_phy_wr(base, AEQRX_REG_BANK_1, AEQRX_SLCAL1_CTRL0, + brcm_sata_phy_wr(port, AEQRX_REG_BANK_1, AEQRX_SLCAL1_CTRL0, 0, 0x3000); usleep_range(1000, 2000); - brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_CAP_CHARGE_TIME, 0, 0x32); + brcm_sata_phy_wr(port, PLL_REG_BANK_0, PLL_CAP_CHARGE_TIME, 0, 0x32); - brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_VCO_CAL_THRESH, 0, 0xa); + brcm_sata_phy_wr(port, PLL_REG_BANK_0, PLL_VCO_CAL_THRESH, 0, 0xa); - brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_FREQ_DET_TIME, 0, 0x64); + brcm_sata_phy_wr(port, PLL_REG_BANK_0, PLL_FREQ_DET_TIME, 0, 0x64); usleep_range(1000, 2000); /* Acquire PLL lock */ try = 50; while (try) { - tmp = brcm_sata_phy_rd(base, BLOCK0_REG_BANK, + tmp = brcm_sata_phy_rd(port, BLOCK0_REG_BANK, BLOCK0_XGXSSTATUS); if (tmp & BLOCK0_XGXSSTATUS_PLL_LOCK) break; @@ -687,10 +670,9 @@ static int brcm_sata_phy_init(struct phy *phy) static void brcm_stb_sata_calibrate(struct brcm_sata_port *port) { - void __iomem *base = brcm_sata_pcb_base(port); u32 tmp = BIT(8); - brcm_sata_phy_wr(base, RXPMD_REG_BANK, RXPMD_RX_FREQ_MON_CONTROL1, + brcm_sata_phy_wr(port, RXPMD_REG_BANK, RXPMD_RX_FREQ_MON_CONTROL1, ~tmp, tmp); } diff --git a/drivers/phy/motorola/phy-mapphone-mdm6600.c b/drivers/phy/motorola/phy-mapphone-mdm6600.c index f20524f0c21d..94a34cf75eb3 100644 --- a/drivers/phy/motorola/phy-mapphone-mdm6600.c +++ b/drivers/phy/motorola/phy-mapphone-mdm6600.c @@ -20,6 +20,7 @@ #define PHY_MDM6600_PHY_DELAY_MS 4000 /* PHY enable 2.2s to 3.5s */ #define PHY_MDM6600_ENABLED_DELAY_MS 8000 /* 8s more total for MDM6600 */ +#define PHY_MDM6600_WAKE_KICK_MS 600 /* time on after GPIO toggle */ #define MDM6600_MODEM_IDLE_DELAY_MS 1000 /* modem after USB suspend */ #define MDM6600_MODEM_WAKE_DELAY_MS 200 /* modem response after idle */ @@ -243,10 +244,24 @@ static irqreturn_t phy_mdm6600_wakeirq_thread(int irq, void *data) { struct phy_mdm6600 *ddata = data; struct gpio_desc *mode_gpio1; + int error, wakeup; mode_gpio1 = ddata->mode_gpios->desc[PHY_MDM6600_MODE1]; - dev_dbg(ddata->dev, "OOB wake on mode_gpio1: %i\n", - gpiod_get_value(mode_gpio1)); + wakeup = gpiod_get_value(mode_gpio1); + if (!wakeup) + return IRQ_NONE; + + dev_dbg(ddata->dev, "OOB wake on mode_gpio1: %i\n", wakeup); + error = pm_runtime_get_sync(ddata->dev); + if (error < 0) { + pm_runtime_put_noidle(ddata->dev); + + return IRQ_NONE; + } + + /* Just wake-up and kick the autosuspend timer */ + pm_runtime_mark_last_busy(ddata->dev); + pm_runtime_put_autosuspend(ddata->dev); return IRQ_HANDLED; } @@ -496,8 +511,14 @@ static void phy_mdm6600_modem_wake(struct work_struct *work) ddata = container_of(work, struct phy_mdm6600, modem_wake_work.work); phy_mdm6600_wake_modem(ddata); + + /* + * The modem does not always stay awake 1.2 seconds after toggling + * the wake GPIO, and sometimes it idles after about some 600 ms + * making writes time out. + */ schedule_delayed_work(&ddata->modem_wake_work, - msecs_to_jiffies(MDM6600_MODEM_IDLE_DELAY_MS)); + msecs_to_jiffies(PHY_MDM6600_WAKE_KICK_MS)); } static int __maybe_unused phy_mdm6600_runtime_suspend(struct device *dev) diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c index cd5a6c95dbdc..a27b8d578d7f 100644 --- a/drivers/phy/phy-core.c +++ b/drivers/phy/phy-core.c @@ -688,11 +688,9 @@ struct phy *phy_get(struct device *dev, const char *string) get_device(&phy->dev); link = device_link_add(dev, &phy->dev, DL_FLAG_STATELESS); - if (!link) { - dev_err(dev, "failed to create device link to %s\n", + if (!link) + dev_dbg(dev, "failed to create device link to %s\n", dev_name(phy->dev.parent)); - return ERR_PTR(-EINVAL); - } return phy; } @@ -803,11 +801,9 @@ struct phy *devm_of_phy_get(struct device *dev, struct device_node *np, } link = device_link_add(dev, &phy->dev, DL_FLAG_STATELESS); - if (!link) { - dev_err(dev, "failed to create device link to %s\n", + if (!link) + dev_dbg(dev, "failed to create device link to %s\n", dev_name(phy->dev.parent)); - return ERR_PTR(-EINVAL); - } return phy; } @@ -852,11 +848,9 @@ struct phy *devm_of_phy_get_by_index(struct device *dev, struct device_node *np, devres_add(dev, ptr); link = device_link_add(dev, &phy->dev, DL_FLAG_STATELESS); - if (!link) { - dev_err(dev, "failed to create device link to %s\n", + if (!link) + dev_dbg(dev, "failed to create device link to %s\n", dev_name(phy->dev.parent)); - return ERR_PTR(-EINVAL); - } return phy; } diff --git a/drivers/phy/ti/phy-gmii-sel.c b/drivers/phy/ti/phy-gmii-sel.c index a28bd15297f5..1c536fc03c83 100644 --- a/drivers/phy/ti/phy-gmii-sel.c +++ b/drivers/phy/ti/phy-gmii-sel.c @@ -80,20 +80,20 @@ static int phy_gmii_sel_mode(struct phy *phy, enum phy_mode mode, int submode) break; case PHY_INTERFACE_MODE_MII: - mode = AM33XX_GMII_SEL_MODE_MII; + case PHY_INTERFACE_MODE_GMII: + gmii_sel_mode = AM33XX_GMII_SEL_MODE_MII; break; default: - dev_warn(dev, - "port%u: unsupported mode: \"%s\". Defaulting to MII.\n", - if_phy->id, phy_modes(rgmii_id)); + dev_warn(dev, "port%u: unsupported mode: \"%s\"\n", + if_phy->id, phy_modes(submode)); return -EINVAL; } if_phy->phy_if_mode = submode; dev_dbg(dev, "%s id:%u mode:%u rgmii_id:%d rmii_clk_ext:%d\n", - __func__, if_phy->id, mode, rgmii_id, + __func__, if_phy->id, submode, rgmii_id, if_phy->rmii_clock_external); regfield = if_phy->fields[PHY_GMII_SEL_PORT_MODE]; diff --git a/drivers/pinctrl/cirrus/pinctrl-madera-core.c b/drivers/pinctrl/cirrus/pinctrl-madera-core.c index 7b6409ef553c..dce2626384a9 100644 --- a/drivers/pinctrl/cirrus/pinctrl-madera-core.c +++ b/drivers/pinctrl/cirrus/pinctrl-madera-core.c @@ -1073,13 +1073,26 @@ static int madera_pin_probe(struct platform_device *pdev) return ret; } + platform_set_drvdata(pdev, priv); + dev_dbg(priv->dev, "pinctrl probed ok\n"); return 0; } +static int madera_pin_remove(struct platform_device *pdev) +{ + struct madera_pin_private *priv = platform_get_drvdata(pdev); + + if (priv->madera->pdata.gpio_configs) + pinctrl_unregister_mappings(priv->madera->pdata.gpio_configs); + + return 0; +} + static struct platform_driver madera_pin_driver = { .probe = madera_pin_probe, + .remove = madera_pin_remove, .driver = { .name = "madera-pinctrl", }, diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 446d84fe0e31..f23c55e22195 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -2021,7 +2021,6 @@ static int pinctrl_claim_hogs(struct pinctrl_dev *pctldev) return PTR_ERR(pctldev->p); } - kref_get(&pctldev->p->users); pctldev->hog_default = pinctrl_lookup_state(pctldev->p, PINCTRL_STATE_DEFAULT); if (IS_ERR(pctldev->hog_default)) { diff --git a/drivers/pinctrl/freescale/pinctrl-scu.c b/drivers/pinctrl/freescale/pinctrl-scu.c index 73bf1d9f9cc6..23cf04bdfc55 100644 --- a/drivers/pinctrl/freescale/pinctrl-scu.c +++ b/drivers/pinctrl/freescale/pinctrl-scu.c @@ -23,12 +23,12 @@ struct imx_sc_msg_req_pad_set { struct imx_sc_rpc_msg hdr; u32 val; u16 pad; -} __packed; +} __packed __aligned(4); struct imx_sc_msg_req_pad_get { struct imx_sc_rpc_msg hdr; u16 pad; -} __packed; +} __packed __aligned(4); struct imx_sc_msg_resp_pad_get { struct imx_sc_rpc_msg hdr; diff --git a/drivers/pinctrl/meson/pinctrl-meson-gxl.c b/drivers/pinctrl/meson/pinctrl-meson-gxl.c index 1b6e8646700f..2ac921c83da9 100644 --- a/drivers/pinctrl/meson/pinctrl-meson-gxl.c +++ b/drivers/pinctrl/meson/pinctrl-meson-gxl.c @@ -147,8 +147,8 @@ static const unsigned int sdio_d0_pins[] = { GPIOX_0 }; static const unsigned int sdio_d1_pins[] = { GPIOX_1 }; static const unsigned int sdio_d2_pins[] = { GPIOX_2 }; static const unsigned int sdio_d3_pins[] = { GPIOX_3 }; -static const unsigned int sdio_cmd_pins[] = { GPIOX_4 }; -static const unsigned int sdio_clk_pins[] = { GPIOX_5 }; +static const unsigned int sdio_clk_pins[] = { GPIOX_4 }; +static const unsigned int sdio_cmd_pins[] = { GPIOX_5 }; static const unsigned int sdio_irq_pins[] = { GPIOX_7 }; static const unsigned int nand_ce0_pins[] = { BOOT_8 }; diff --git a/drivers/pinctrl/pinctrl-falcon.c b/drivers/pinctrl/pinctrl-falcon.c index a454f57c264e..62c02b969327 100644 --- a/drivers/pinctrl/pinctrl-falcon.c +++ b/drivers/pinctrl/pinctrl-falcon.c @@ -451,7 +451,7 @@ static int pinctrl_falcon_probe(struct platform_device *pdev) falcon_info.clk[*bank] = clk_get(&ppdev->dev, NULL); if (IS_ERR(falcon_info.clk[*bank])) { dev_err(&ppdev->dev, "failed to get clock\n"); - of_node_put(np) + of_node_put(np); return PTR_ERR(falcon_info.clk[*bank]); } falcon_info.membase[*bank] = devm_ioremap_resource(&pdev->dev, diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index 9a8daa256a32..1a948c3f54b7 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -1104,7 +1104,6 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl) pctrl->irq_chip.irq_mask = msm_gpio_irq_mask; pctrl->irq_chip.irq_unmask = msm_gpio_irq_unmask; pctrl->irq_chip.irq_ack = msm_gpio_irq_ack; - pctrl->irq_chip.irq_eoi = irq_chip_eoi_parent; pctrl->irq_chip.irq_set_type = msm_gpio_irq_set_type; pctrl->irq_chip.irq_set_wake = msm_gpio_irq_set_wake; pctrl->irq_chip.irq_request_resources = msm_gpio_irq_reqres; @@ -1118,7 +1117,7 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl) if (!chip->irq.parent_domain) return -EPROBE_DEFER; chip->irq.child_to_parent_hwirq = msm_gpio_wakeirq; - + pctrl->irq_chip.irq_eoi = irq_chip_eoi_parent; /* * Let's skip handling the GPIOs, if the parent irqchip * is handling the direct connect IRQ of the GPIO. diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c index fba1d41d20ec..338a15d08629 100644 --- a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c @@ -794,7 +794,7 @@ static int pm8xxx_gpio_probe(struct platform_device *pdev) girq->fwnode = of_node_to_fwnode(pctrl->dev->of_node); girq->parent_domain = parent_domain; girq->child_to_parent_hwirq = pm8xxx_child_to_parent_hwirq; - girq->populate_parent_alloc_arg = gpiochip_populate_parent_fwspec_fourcell; + girq->populate_parent_alloc_arg = gpiochip_populate_parent_fwspec_twocell; girq->child_offset_to_irq = pm8xxx_child_offset_to_irq; girq->child_irq_domain_ops.translate = pm8xxx_domain_translate; diff --git a/drivers/regulator/stm32-vrefbuf.c b/drivers/regulator/stm32-vrefbuf.c index bdfaf7edb75a..992bc18101ef 100644 --- a/drivers/regulator/stm32-vrefbuf.c +++ b/drivers/regulator/stm32-vrefbuf.c @@ -88,7 +88,7 @@ static int stm32_vrefbuf_disable(struct regulator_dev *rdev) } val = readl_relaxed(priv->base + STM32_VREFBUF_CSR); - val = (val & ~STM32_ENVR) | STM32_HIZ; + val &= ~STM32_ENVR; writel_relaxed(val, priv->base + STM32_VREFBUF_CSR); pm_runtime_mark_last_busy(priv->dev); @@ -175,6 +175,7 @@ static const struct regulator_desc stm32_vrefbuf_regu = { .volt_table = stm32_vrefbuf_voltages, .n_voltages = ARRAY_SIZE(stm32_vrefbuf_voltages), .ops = &stm32_vrefbuf_volt_ops, + .off_on_delay = 1000, .type = REGULATOR_VOLTAGE, .owner = THIS_MODULE, }; diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig index 56084635dd63..ffdb5bc25d6d 100644 --- a/drivers/remoteproc/Kconfig +++ b/drivers/remoteproc/Kconfig @@ -128,6 +128,7 @@ config QCOM_Q6V5_MSS select MFD_SYSCON select QCOM_MDT_LOADER select QCOM_Q6V5_COMMON + select QCOM_Q6V5_IPA_NOTIFY select QCOM_RPROC_COMMON select QCOM_SCM help @@ -169,9 +170,6 @@ config QCOM_Q6V5_WCSS config QCOM_Q6V5_IPA_NOTIFY tristate - depends on QCOM_IPA - depends on QCOM_Q6V5_MSS - default QCOM_IPA config QCOM_SYSMON tristate "Qualcomm sysmon driver" diff --git a/drivers/reset/Kconfig b/drivers/reset/Kconfig index 461b0e506a26..d9efbfd29646 100644 --- a/drivers/reset/Kconfig +++ b/drivers/reset/Kconfig @@ -51,6 +51,7 @@ config RESET_BRCMSTB config RESET_BRCMSTB_RESCAL bool "Broadcom STB RESCAL reset controller" + depends on HAS_IOMEM default ARCH_BRCMSTB || COMPILE_TEST help This enables the RESCAL reset controller for SATA, PCIe0, or PCIe1 on @@ -73,7 +74,7 @@ config RESET_IMX7 config RESET_INTEL_GW bool "Intel Reset Controller Driver" - depends on OF + depends on OF && HAS_IOMEM select REGMAP_MMIO help This enables the reset controller driver for Intel Gateway SoCs. diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index b7d64690ea38..6eb431c194bd 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -368,7 +368,7 @@ enum qeth_qdio_info_states { struct qeth_buffer_pool_entry { struct list_head list; struct list_head init_list; - void *elements[QDIO_MAX_ELEMENTS_PER_BUFFER]; + struct page *elements[QDIO_MAX_ELEMENTS_PER_BUFFER]; }; struct qeth_qdio_buffer_pool { @@ -847,11 +847,6 @@ struct qeth_trap_id { /*some helper functions*/ #define QETH_CARD_IFNAME(card) (((card)->dev)? (card)->dev->name : "") -static inline bool qeth_netdev_is_registered(struct net_device *dev) -{ - return dev->netdev_ops != NULL; -} - static inline u16 qeth_iqd_translate_txq(struct net_device *dev, u16 txq) { if (txq == QETH_IQD_MCAST_TXQ) @@ -985,7 +980,7 @@ extern const struct attribute_group qeth_device_blkt_group; extern const struct device_type qeth_generic_devtype; const char *qeth_get_cardname_short(struct qeth_card *); -int qeth_realloc_buffer_pool(struct qeth_card *, int); +int qeth_resize_buffer_pool(struct qeth_card *card, unsigned int count); int qeth_core_load_discipline(struct qeth_card *, enum qeth_discipline_id); void qeth_core_free_discipline(struct qeth_card *); @@ -1053,6 +1048,7 @@ int qeth_configure_cq(struct qeth_card *, enum qeth_cq); int qeth_hw_trap(struct qeth_card *, enum qeth_diags_trap_action); void qeth_trace_features(struct qeth_card *); int qeth_setassparms_cb(struct qeth_card *, struct qeth_reply *, unsigned long); +int qeth_setup_netdev(struct qeth_card *card); int qeth_set_features(struct net_device *, netdev_features_t); void qeth_enable_hw_features(struct net_device *dev); netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t); @@ -1060,6 +1056,7 @@ netdev_features_t qeth_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features); void qeth_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats); +int qeth_set_real_num_tx_queues(struct qeth_card *card, unsigned int count); u16 qeth_iqd_select_queue(struct net_device *dev, struct sk_buff *skb, u8 cast_type, struct net_device *sb_dev); int qeth_open(struct net_device *dev); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 37c17ad8ee25..bd3adbb6ad50 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -65,7 +65,6 @@ static struct lock_class_key qdio_out_skb_queue_key; static void qeth_issue_next_read_cb(struct qeth_card *card, struct qeth_cmd_buffer *iob, unsigned int data_length); -static void qeth_free_buffer_pool(struct qeth_card *); static int qeth_qdio_establish(struct qeth_card *); static void qeth_free_qdio_queues(struct qeth_card *card); static void qeth_notify_skbs(struct qeth_qdio_out_q *queue, @@ -212,49 +211,121 @@ void qeth_clear_working_pool_list(struct qeth_card *card) } EXPORT_SYMBOL_GPL(qeth_clear_working_pool_list); +static void qeth_free_pool_entry(struct qeth_buffer_pool_entry *entry) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(entry->elements); i++) { + if (entry->elements[i]) + __free_page(entry->elements[i]); + } + + kfree(entry); +} + +static void qeth_free_buffer_pool(struct qeth_card *card) +{ + struct qeth_buffer_pool_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &card->qdio.init_pool.entry_list, + init_list) { + list_del(&entry->init_list); + qeth_free_pool_entry(entry); + } +} + +static struct qeth_buffer_pool_entry *qeth_alloc_pool_entry(unsigned int pages) +{ + struct qeth_buffer_pool_entry *entry; + unsigned int i; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return NULL; + + for (i = 0; i < pages; i++) { + entry->elements[i] = __dev_alloc_page(GFP_KERNEL); + + if (!entry->elements[i]) { + qeth_free_pool_entry(entry); + return NULL; + } + } + + return entry; +} + static int qeth_alloc_buffer_pool(struct qeth_card *card) { - struct qeth_buffer_pool_entry *pool_entry; - void *ptr; - int i, j; + unsigned int buf_elements = QETH_MAX_BUFFER_ELEMENTS(card); + unsigned int i; QETH_CARD_TEXT(card, 5, "alocpool"); for (i = 0; i < card->qdio.init_pool.buf_count; ++i) { - pool_entry = kzalloc(sizeof(*pool_entry), GFP_KERNEL); - if (!pool_entry) { + struct qeth_buffer_pool_entry *entry; + + entry = qeth_alloc_pool_entry(buf_elements); + if (!entry) { qeth_free_buffer_pool(card); return -ENOMEM; } - for (j = 0; j < QETH_MAX_BUFFER_ELEMENTS(card); ++j) { - ptr = (void *) __get_free_page(GFP_KERNEL); - if (!ptr) { - while (j > 0) - free_page((unsigned long) - pool_entry->elements[--j]); - kfree(pool_entry); - qeth_free_buffer_pool(card); - return -ENOMEM; - } - pool_entry->elements[j] = ptr; - } - list_add(&pool_entry->init_list, - &card->qdio.init_pool.entry_list); + + list_add(&entry->init_list, &card->qdio.init_pool.entry_list); } return 0; } -int qeth_realloc_buffer_pool(struct qeth_card *card, int bufcnt) +int qeth_resize_buffer_pool(struct qeth_card *card, unsigned int count) { + unsigned int buf_elements = QETH_MAX_BUFFER_ELEMENTS(card); + struct qeth_qdio_buffer_pool *pool = &card->qdio.init_pool; + struct qeth_buffer_pool_entry *entry, *tmp; + int delta = count - pool->buf_count; + LIST_HEAD(entries); + QETH_CARD_TEXT(card, 2, "realcbp"); - /* TODO: steel/add buffers from/to a running card's buffer pool (?) */ - qeth_clear_working_pool_list(card); - qeth_free_buffer_pool(card); - card->qdio.in_buf_pool.buf_count = bufcnt; - card->qdio.init_pool.buf_count = bufcnt; - return qeth_alloc_buffer_pool(card); + /* Defer until queue is allocated: */ + if (!card->qdio.in_q) + goto out; + + /* Remove entries from the pool: */ + while (delta < 0) { + entry = list_first_entry(&pool->entry_list, + struct qeth_buffer_pool_entry, + init_list); + list_del(&entry->init_list); + qeth_free_pool_entry(entry); + + delta++; + } + + /* Allocate additional entries: */ + while (delta > 0) { + entry = qeth_alloc_pool_entry(buf_elements); + if (!entry) { + list_for_each_entry_safe(entry, tmp, &entries, + init_list) { + list_del(&entry->init_list); + qeth_free_pool_entry(entry); + } + + return -ENOMEM; + } + + list_add(&entry->init_list, &entries); + + delta--; + } + + list_splice(&entries, &pool->entry_list); + +out: + card->qdio.in_buf_pool.buf_count = count; + pool->buf_count = count; + return 0; } -EXPORT_SYMBOL_GPL(qeth_realloc_buffer_pool); +EXPORT_SYMBOL_GPL(qeth_resize_buffer_pool); static void qeth_free_qdio_queue(struct qeth_qdio_q *q) { @@ -467,9 +538,10 @@ static void qeth_qdio_handle_aob(struct qeth_card *card, for (i = 0; i < aob->sb_count && i < QETH_MAX_BUFFER_ELEMENTS(card); i++) { - if (aob->sba[i] && buffer->is_header[i]) - kmem_cache_free(qeth_core_header_cache, - (void *) aob->sba[i]); + void *data = phys_to_virt(aob->sba[i]); + + if (data && buffer->is_header[i]) + kmem_cache_free(qeth_core_header_cache, data); } atomic_set(&buffer->state, QETH_QDIO_BUF_HANDLED_DELAYED); @@ -1171,24 +1243,14 @@ void qeth_drain_output_queues(struct qeth_card *card) } EXPORT_SYMBOL_GPL(qeth_drain_output_queues); -static void qeth_free_buffer_pool(struct qeth_card *card) -{ - struct qeth_buffer_pool_entry *pool_entry, *tmp; - int i = 0; - list_for_each_entry_safe(pool_entry, tmp, - &card->qdio.init_pool.entry_list, init_list){ - for (i = 0; i < QETH_MAX_BUFFER_ELEMENTS(card); ++i) - free_page((unsigned long)pool_entry->elements[i]); - list_del(&pool_entry->init_list); - kfree(pool_entry); - } -} - static int qeth_osa_set_output_queues(struct qeth_card *card, bool single) { - unsigned int count = single ? 1 : card->dev->num_tx_queues; + unsigned int max = single ? 1 : card->dev->num_tx_queues; + unsigned int count; int rc; + count = IS_VM_NIC(card) ? min(max, card->dev->real_num_tx_queues) : max; + rtnl_lock(); rc = netif_set_real_num_tx_queues(card->dev, count); rtnl_unlock(); @@ -1196,17 +1258,16 @@ static int qeth_osa_set_output_queues(struct qeth_card *card, bool single) if (rc) return rc; - if (card->qdio.no_out_queues == count) + if (card->qdio.no_out_queues == max) return 0; if (atomic_read(&card->qdio.state) != QETH_QDIO_UNINITIALIZED) qeth_free_qdio_queues(card); - if (count == 1) + if (max == 1 && card->qdio.do_prio_queueing != QETH_PRIOQ_DEFAULT) dev_info(&card->gdev->dev, "Priority Queueing not supported\n"); - card->qdio.default_out_queue = single ? 0 : QETH_DEFAULT_QUEUE; - card->qdio.no_out_queues = count; + card->qdio.no_out_queues = max; return 0; } @@ -2392,7 +2453,6 @@ static void qeth_free_qdio_queues(struct qeth_card *card) return; qeth_free_cq(card); - cancel_delayed_work_sync(&card->buffer_reclaim_work); for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) { if (card->qdio.in_q->bufs[j].rx_skb) dev_kfree_skb_any(card->qdio.in_q->bufs[j].rx_skb); @@ -2574,7 +2634,6 @@ static struct qeth_buffer_pool_entry *qeth_find_free_buffer_pool_entry( struct list_head *plh; struct qeth_buffer_pool_entry *entry; int i, free; - struct page *page; if (list_empty(&card->qdio.in_buf_pool.entry_list)) return NULL; @@ -2583,7 +2642,7 @@ static struct qeth_buffer_pool_entry *qeth_find_free_buffer_pool_entry( entry = list_entry(plh, struct qeth_buffer_pool_entry, list); free = 1; for (i = 0; i < QETH_MAX_BUFFER_ELEMENTS(card); ++i) { - if (page_count(virt_to_page(entry->elements[i])) > 1) { + if (page_count(entry->elements[i]) > 1) { free = 0; break; } @@ -2598,15 +2657,15 @@ static struct qeth_buffer_pool_entry *qeth_find_free_buffer_pool_entry( entry = list_entry(card->qdio.in_buf_pool.entry_list.next, struct qeth_buffer_pool_entry, list); for (i = 0; i < QETH_MAX_BUFFER_ELEMENTS(card); ++i) { - if (page_count(virt_to_page(entry->elements[i])) > 1) { - page = alloc_page(GFP_ATOMIC); - if (!page) { + if (page_count(entry->elements[i]) > 1) { + struct page *page = dev_alloc_page(); + + if (!page) return NULL; - } else { - free_page((unsigned long)entry->elements[i]); - entry->elements[i] = page_address(page); - QETH_CARD_STAT_INC(card, rx_sg_alloc_page); - } + + __free_page(entry->elements[i]); + entry->elements[i] = page; + QETH_CARD_STAT_INC(card, rx_sg_alloc_page); } } list_del_init(&entry->list); @@ -2624,12 +2683,12 @@ static int qeth_init_input_buffer(struct qeth_card *card, ETH_HLEN + sizeof(struct ipv6hdr)); if (!buf->rx_skb) - return 1; + return -ENOMEM; } pool_entry = qeth_find_free_buffer_pool_entry(card); if (!pool_entry) - return 1; + return -ENOBUFS; /* * since the buffer is accessed only from the input_tasklet @@ -2642,7 +2701,7 @@ static int qeth_init_input_buffer(struct qeth_card *card, for (i = 0; i < QETH_MAX_BUFFER_ELEMENTS(card); ++i) { buf->buffer->element[i].length = PAGE_SIZE; buf->buffer->element[i].addr = - virt_to_phys(pool_entry->elements[i]); + page_to_phys(pool_entry->elements[i]); if (i == QETH_MAX_BUFFER_ELEMENTS(card) - 1) buf->buffer->element[i].eflags = SBAL_EFLAGS_LAST_ENTRY; else @@ -2674,10 +2733,15 @@ static int qeth_init_qdio_queues(struct qeth_card *card) /* inbound queue */ qdio_reset_buffers(card->qdio.in_q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q); memset(&card->rx, 0, sizeof(struct qeth_rx)); + qeth_initialize_working_pool_list(card); /*give only as many buffers to hardware as we have buffer pool entries*/ - for (i = 0; i < card->qdio.in_buf_pool.buf_count - 1; ++i) - qeth_init_input_buffer(card, &card->qdio.in_q->bufs[i]); + for (i = 0; i < card->qdio.in_buf_pool.buf_count - 1; i++) { + rc = qeth_init_input_buffer(card, &card->qdio.in_q->bufs[i]); + if (rc) + return rc; + } + card->qdio.in_q->next_buf_to_init = card->qdio.in_buf_pool.buf_count - 1; rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0, 0, @@ -3292,6 +3356,7 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index, for (i = index; i < index + count; ++i) { unsigned int bidx = QDIO_BUFNR(i); + struct sk_buff *skb; buf = queue->bufs[bidx]; buf->buffer->element[buf->next_element_to_fill - 1].eflags |= @@ -3300,8 +3365,11 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index, if (queue->bufstates) queue->bufstates[bidx].user = buf; - if (IS_IQD(queue->card)) + if (IS_IQD(card)) { + skb_queue_walk(&buf->skb_list, skb) + skb_tx_timestamp(skb); continue; + } if (!queue->do_pack) { if ((atomic_read(&queue->used_buffers) >= @@ -3645,6 +3713,7 @@ static int qeth_add_hw_header(struct qeth_qdio_out_q *queue, unsigned int hdr_len, unsigned int proto_len, unsigned int *elements) { + gfp_t gfp = GFP_ATOMIC | (skb_pfmemalloc(skb) ? __GFP_MEMALLOC : 0); const unsigned int contiguous = proto_len ? proto_len : 1; const unsigned int max_elements = queue->max_elements; unsigned int __elements; @@ -3700,10 +3769,11 @@ check_layout: *hdr = skb_push(skb, hdr_len); return hdr_len; } - /* fall back */ + + /* Fall back to cache element with known-good alignment: */ if (hdr_len + proto_len > QETH_HDR_CACHE_OBJ_SIZE) return -E2BIG; - *hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC); + *hdr = kmem_cache_alloc(qeth_core_header_cache, gfp); if (!*hdr) return -ENOMEM; /* Copy protocol headers behind HW header: */ @@ -5925,22 +5995,8 @@ static struct net_device *qeth_alloc_netdev(struct qeth_card *card) SET_NETDEV_DEV(dev, &card->gdev->dev); netif_carrier_off(dev); - if (IS_OSN(card)) { - dev->ethtool_ops = &qeth_osn_ethtool_ops; - } else { - dev->ethtool_ops = &qeth_ethtool_ops; - dev->priv_flags &= ~IFF_TX_SKB_SHARING; - dev->hw_features |= NETIF_F_SG; - dev->vlan_features |= NETIF_F_SG; - if (IS_IQD(card)) { - dev->features |= NETIF_F_SG; - if (netif_set_real_num_tx_queues(dev, - QETH_IQD_MIN_TXQ)) { - free_netdev(dev); - return NULL; - } - } - } + dev->ethtool_ops = IS_OSN(card) ? &qeth_osn_ethtool_ops : + &qeth_ethtool_ops; return dev; } @@ -5956,6 +6012,28 @@ struct net_device *qeth_clone_netdev(struct net_device *orig) return clone; } +int qeth_setup_netdev(struct qeth_card *card) +{ + struct net_device *dev = card->dev; + unsigned int num_tx_queues; + + dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->hw_features |= NETIF_F_SG; + dev->vlan_features |= NETIF_F_SG; + + if (IS_IQD(card)) { + dev->features |= NETIF_F_SG; + num_tx_queues = QETH_IQD_MIN_TXQ; + } else if (IS_VM_NIC(card)) { + num_tx_queues = 1; + } else { + num_tx_queues = dev->real_num_tx_queues; + } + + return qeth_set_real_num_tx_queues(card, num_tx_queues); +} +EXPORT_SYMBOL_GPL(qeth_setup_netdev); + static int qeth_core_probe_device(struct ccwgroup_device *gdev) { struct qeth_card *card; @@ -5995,12 +6073,13 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev) goto err_card; } + qeth_determine_capabilities(card); + qeth_set_blkt_defaults(card); + card->qdio.no_out_queues = card->dev->num_tx_queues; rc = qeth_update_from_chp_desc(card); if (rc) goto err_chp_desc; - qeth_determine_capabilities(card); - qeth_set_blkt_defaults(card); enforced_disc = qeth_enforce_discipline(card); switch (enforced_disc) { @@ -6185,9 +6264,6 @@ int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) struct mii_ioctl_data *mii_data; int rc = 0; - if (!card) - return -ENODEV; - switch (cmd) { case SIOC_QETH_ADP_SET_SNMP_CONTROL: rc = qeth_snmp_command(card, rq->ifr_ifru.ifru_data); @@ -6567,12 +6643,59 @@ void qeth_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) } EXPORT_SYMBOL_GPL(qeth_get_stats64); +#define TC_IQD_UCAST 0 +static void qeth_iqd_set_prio_tc_map(struct net_device *dev, + unsigned int ucast_txqs) +{ + unsigned int prio; + + /* IQD requires mcast traffic to be placed on a dedicated queue, and + * qeth_iqd_select_queue() deals with this. + * For unicast traffic, we defer the queue selection to the stack. + * By installing a trivial prio map that spans over only the unicast + * queues, we can encourage the stack to spread the ucast traffic evenly + * without selecting the mcast queue. + */ + + /* One traffic class, spanning over all active ucast queues: */ + netdev_set_num_tc(dev, 1); + netdev_set_tc_queue(dev, TC_IQD_UCAST, ucast_txqs, + QETH_IQD_MIN_UCAST_TXQ); + + /* Map all priorities to this traffic class: */ + for (prio = 0; prio <= TC_BITMASK; prio++) + netdev_set_prio_tc_map(dev, prio, TC_IQD_UCAST); +} + +int qeth_set_real_num_tx_queues(struct qeth_card *card, unsigned int count) +{ + struct net_device *dev = card->dev; + int rc; + + /* Per netif_setup_tc(), adjust the mapping first: */ + if (IS_IQD(card)) + qeth_iqd_set_prio_tc_map(dev, count - 1); + + rc = netif_set_real_num_tx_queues(dev, count); + + if (rc && IS_IQD(card)) + qeth_iqd_set_prio_tc_map(dev, dev->real_num_tx_queues - 1); + + return rc; +} + u16 qeth_iqd_select_queue(struct net_device *dev, struct sk_buff *skb, u8 cast_type, struct net_device *sb_dev) { + u16 txq; + if (cast_type != RTN_UNICAST) return QETH_IQD_MCAST_TXQ; - return QETH_IQD_MIN_UCAST_TXQ; + if (dev->real_num_tx_queues == QETH_IQD_MIN_TXQ) + return QETH_IQD_MIN_UCAST_TXQ; + + txq = netdev_pick_tx(dev, skb, sb_dev); + return (txq == QETH_IQD_MCAST_TXQ) ? QETH_IQD_MIN_UCAST_TXQ : txq; } EXPORT_SYMBOL_GPL(qeth_iqd_select_queue); diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c index 2bd9993aa60b..533a7f26dbe1 100644 --- a/drivers/s390/net/qeth_core_sys.c +++ b/drivers/s390/net/qeth_core_sys.c @@ -176,7 +176,7 @@ static ssize_t qeth_dev_prioqing_store(struct device *dev, struct qeth_card *card = dev_get_drvdata(dev); int rc = 0; - if (IS_IQD(card)) + if (IS_IQD(card) || IS_VM_NIC(card)) return -EOPNOTSUPP; mutex_lock(&card->conf_mutex); @@ -247,8 +247,8 @@ static ssize_t qeth_dev_bufcnt_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct qeth_card *card = dev_get_drvdata(dev); + unsigned int cnt; char *tmp; - int cnt, old_cnt; int rc = 0; mutex_lock(&card->conf_mutex); @@ -257,13 +257,12 @@ static ssize_t qeth_dev_bufcnt_store(struct device *dev, goto out; } - old_cnt = card->qdio.in_buf_pool.buf_count; cnt = simple_strtoul(buf, &tmp, 10); cnt = (cnt < QETH_IN_BUF_COUNT_MIN) ? QETH_IN_BUF_COUNT_MIN : ((cnt > QETH_IN_BUF_COUNT_MAX) ? QETH_IN_BUF_COUNT_MAX : cnt); - if (old_cnt != cnt) { - rc = qeth_realloc_buffer_pool(card, cnt); - } + + rc = qeth_resize_buffer_pool(card, cnt); + out: mutex_unlock(&card->conf_mutex); return rc ? rc : count; diff --git a/drivers/s390/net/qeth_ethtool.c b/drivers/s390/net/qeth_ethtool.c index 9052c72d5b8f..31e019085fc3 100644 --- a/drivers/s390/net/qeth_ethtool.c +++ b/drivers/s390/net/qeth_ethtool.c @@ -153,7 +153,6 @@ static void qeth_get_drvinfo(struct net_device *dev, strlcpy(info->driver, IS_LAYER2(card) ? "qeth_l2" : "qeth_l3", sizeof(info->driver)); - strlcpy(info->version, "1.0", sizeof(info->version)); strlcpy(info->fw_version, card->info.mcl_level, sizeof(info->fw_version)); snprintf(info->bus_info, sizeof(info->bus_info), "%s/%s/%s", @@ -175,6 +174,46 @@ static void qeth_get_channels(struct net_device *dev, channels->combined_count = 0; } +static int qeth_set_channels(struct net_device *dev, + struct ethtool_channels *channels) +{ + struct qeth_card *card = dev->ml_priv; + + if (channels->rx_count == 0 || channels->tx_count == 0) + return -EINVAL; + if (channels->tx_count > card->qdio.no_out_queues) + return -EINVAL; + + if (IS_IQD(card)) { + if (channels->tx_count < QETH_IQD_MIN_TXQ) + return -EINVAL; + + /* Reject downgrade while running. It could push displaced + * ucast flows onto txq0, which is reserved for mcast. + */ + if (netif_running(dev) && + channels->tx_count < dev->real_num_tx_queues) + return -EPERM; + } else { + /* OSA still uses the legacy prio-queue mechanism: */ + if (!IS_VM_NIC(card)) + return -EOPNOTSUPP; + } + + return qeth_set_real_num_tx_queues(card, channels->tx_count); +} + +static int qeth_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct qeth_card *card = dev->ml_priv; + + if (!IS_IQD(card)) + return -EOPNOTSUPP; + + return ethtool_op_get_ts_info(dev, info); +} + static int qeth_get_tunable(struct net_device *dev, const struct ethtool_tunable *tuna, void *data) { @@ -410,6 +449,8 @@ const struct ethtool_ops qeth_ethtool_ops = { .get_sset_count = qeth_get_sset_count, .get_drvinfo = qeth_get_drvinfo, .get_channels = qeth_get_channels, + .set_channels = qeth_set_channels, + .get_ts_info = qeth_get_ts_info, .get_tunable = qeth_get_tunable, .set_tunable = qeth_set_tunable, .get_link_ksettings = qeth_get_link_ksettings, diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 0bf5e7133229..73cb363b1fab 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -284,6 +284,7 @@ static void qeth_l2_stop_card(struct qeth_card *card) if (card->state == CARD_STATE_SOFTSETUP) { qeth_clear_ipacmd_list(card); qeth_drain_output_queues(card); + cancel_delayed_work_sync(&card->buffer_reclaim_work); card->state = CARD_STATE_DOWN; } @@ -498,6 +499,7 @@ static void qeth_l2_rx_mode_work(struct work_struct *work) static int qeth_l2_xmit_osn(struct qeth_card *card, struct sk_buff *skb, struct qeth_qdio_out_q *queue) { + gfp_t gfp = GFP_ATOMIC | (skb_pfmemalloc(skb) ? __GFP_MEMALLOC : 0); struct qeth_hdr *hdr = (struct qeth_hdr *)skb->data; addr_t end = (addr_t)(skb->data + sizeof(*hdr)); addr_t start = (addr_t)skb->data; @@ -510,7 +512,7 @@ static int qeth_l2_xmit_osn(struct qeth_card *card, struct sk_buff *skb, if (qeth_get_elements_for_range(start, end) > 1) { /* Misaligned HW header, move it to its own buffer element. */ - hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC); + hdr = kmem_cache_alloc(qeth_core_header_cache, gfp); if (!hdr) return -ENOMEM; hd_len = sizeof(*hdr); @@ -569,7 +571,9 @@ static u16 qeth_l2_select_queue(struct net_device *dev, struct sk_buff *skb, return qeth_iqd_select_queue(dev, skb, qeth_get_ether_cast_type(skb), sb_dev); - return qeth_get_priority_queue(card, skb); + + return IS_VM_NIC(card) ? netdev_pick_tx(dev, skb, sb_dev) : + qeth_get_priority_queue(card, skb); } static const struct device_type qeth_l2_devtype = { @@ -609,7 +613,7 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev) qeth_set_offline(card, false); cancel_work_sync(&card->close_dev_work); - if (qeth_netdev_is_registered(card->dev)) + if (card->dev->reg_state == NETREG_REGISTERED) unregister_netdev(card->dev); } @@ -647,7 +651,7 @@ static const struct net_device_ops qeth_osn_netdev_ops = { .ndo_tx_timeout = qeth_tx_timeout, }; -static int qeth_l2_setup_netdev(struct qeth_card *card, bool carrier_ok) +static int qeth_l2_setup_netdev(struct qeth_card *card) { int rc; @@ -657,6 +661,10 @@ static int qeth_l2_setup_netdev(struct qeth_card *card, bool carrier_ok) goto add_napi; } + rc = qeth_setup_netdev(card); + if (rc) + return rc; + card->dev->needed_headroom = sizeof(struct qeth_hdr); card->dev->netdev_ops = &qeth_l2_netdev_ops; card->dev->priv_flags |= IFF_UNICAST_FLT; @@ -703,13 +711,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card, bool carrier_ok) add_napi: netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT); - rc = register_netdev(card->dev); - if (!rc && carrier_ok) - netif_carrier_on(card->dev); - - if (rc) - card->dev->netdev_ops = NULL; - return rc; + return register_netdev(card->dev); } static void qeth_l2_trace_features(struct qeth_card *card) @@ -782,10 +784,13 @@ static int qeth_l2_set_online(struct qeth_card *card) qeth_set_allowed_threads(card, 0xffffffff, 0); - if (!qeth_netdev_is_registered(dev)) { - rc = qeth_l2_setup_netdev(card, carrier_ok); + if (dev->reg_state != NETREG_REGISTERED) { + rc = qeth_l2_setup_netdev(card); if (rc) goto out_remove; + + if (carrier_ok) + netif_carrier_on(dev); } else { rtnl_lock(); if (carrier_ok) @@ -1511,8 +1516,6 @@ int qeth_bridgeport_an_set(struct qeth_card *card, int enable) struct ccw_device *ddev; struct subchannel_id schid; - if (!card) - return -EINVAL; if (!card->options.sbp.supported_funcs) return -EOPNOTSUPP; ddev = CARD_DDEV(card); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 1000e18c1090..83ae75cf1389 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1172,6 +1172,7 @@ static void qeth_l3_stop_card(struct qeth_card *card) qeth_l3_clear_ip_htable(card, 1); qeth_clear_ipacmd_list(card); qeth_drain_output_queues(card); + cancel_delayed_work_sync(&card->buffer_reclaim_work); card->state = CARD_STATE_DOWN; } @@ -1879,7 +1880,8 @@ static u16 qeth_l3_osa_select_queue(struct net_device *dev, struct sk_buff *skb, { struct qeth_card *card = dev->ml_priv; - return qeth_get_priority_queue(card, skb); + return IS_VM_NIC(card) ? netdev_pick_tx(dev, skb, sb_dev) : + qeth_get_priority_queue(card, skb); } static const struct net_device_ops qeth_l3_netdev_ops = { @@ -1916,11 +1918,15 @@ static const struct net_device_ops qeth_l3_osa_netdev_ops = { .ndo_neigh_setup = qeth_l3_neigh_setup, }; -static int qeth_l3_setup_netdev(struct qeth_card *card, bool carrier_ok) +static int qeth_l3_setup_netdev(struct qeth_card *card) { unsigned int headroom; int rc; + rc = qeth_setup_netdev(card); + if (rc) + return rc; + if (IS_OSD(card) || IS_OSX(card)) { if ((card->info.link_type == QETH_LINK_TYPE_LANE_TR) || (card->info.link_type == QETH_LINK_TYPE_HSTR)) { @@ -1966,7 +1972,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card, bool carrier_ok) rc = qeth_l3_iqd_read_initial_mac(card); if (rc) - goto out; + return rc; } else return -ENODEV; @@ -1981,14 +1987,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card, bool carrier_ok) PAGE_SIZE * (QETH_MAX_BUFFER_ELEMENTS(card) - 1)); netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT); - rc = register_netdev(card->dev); - if (!rc && carrier_ok) - netif_carrier_on(card->dev); - -out: - if (rc) - card->dev->netdev_ops = NULL; - return rc; + return register_netdev(card->dev); } static const struct device_type qeth_l3_devtype = { @@ -2035,7 +2034,7 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev) qeth_set_offline(card, false); cancel_work_sync(&card->close_dev_work); - if (qeth_netdev_is_registered(card->dev)) + if (card->dev->reg_state == NETREG_REGISTERED) unregister_netdev(card->dev); flush_workqueue(card->cmd_wq); @@ -2082,10 +2081,13 @@ static int qeth_l3_set_online(struct qeth_card *card) qeth_set_allowed_threads(card, 0xffffffff, 0); qeth_l3_recover_ip(card); - if (!qeth_netdev_is_registered(dev)) { - rc = qeth_l3_setup_netdev(card, carrier_ok); + if (dev->reg_state != NETREG_REGISTERED) { + rc = qeth_l3_setup_netdev(card); if (rc) goto out_remove; + + if (carrier_ok) + netif_carrier_on(dev); } else { rtnl_lock(); if (carrier_ok) diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c index 29f2517d2a31..a3d1c3bdfadb 100644 --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -206,12 +206,11 @@ static ssize_t qeth_l3_dev_sniffer_store(struct device *dev, qdio_get_ssqd_desc(CARD_DDEV(card), &card->ssqd); if (card->ssqd.qdioac2 & CHSC_AC2_SNIFFER_AVAILABLE) { card->options.sniffer = i; - if (card->qdio.init_pool.buf_count != - QETH_IN_BUF_COUNT_MAX) - qeth_realloc_buffer_pool(card, - QETH_IN_BUF_COUNT_MAX); - } else + qeth_resize_buffer_pool(card, QETH_IN_BUF_COUNT_MAX); + } else { rc = -EPERM; + } + break; default: rc = -EINVAL; diff --git a/drivers/s390/scsi/zfcp_fsf.h b/drivers/s390/scsi/zfcp_fsf.h index 2b1e4da1944f..4bfb79f20588 100644 --- a/drivers/s390/scsi/zfcp_fsf.h +++ b/drivers/s390/scsi/zfcp_fsf.h @@ -410,7 +410,7 @@ struct fsf_qtcb_bottom_port { u8 cb_util; u8 a_util; u8 res2; - u16 temperature; + s16 temperature; u16 vcc; u16 tx_bias; u16 tx_power; diff --git a/drivers/s390/scsi/zfcp_sysfs.c b/drivers/s390/scsi/zfcp_sysfs.c index 494b9fe9cc94..a711a0d15100 100644 --- a/drivers/s390/scsi/zfcp_sysfs.c +++ b/drivers/s390/scsi/zfcp_sysfs.c @@ -800,7 +800,7 @@ static ZFCP_DEV_ATTR(adapter_diag, b2b_credit, 0400, static ZFCP_DEV_ATTR(adapter_diag_sfp, _name, 0400, \ zfcp_sysfs_adapter_diag_sfp_##_name##_show, NULL) -ZFCP_DEFINE_DIAG_SFP_ATTR(temperature, temperature, 5, "%hu"); +ZFCP_DEFINE_DIAG_SFP_ATTR(temperature, temperature, 6, "%hd"); ZFCP_DEFINE_DIAG_SFP_ATTR(vcc, vcc, 5, "%hu"); ZFCP_DEFINE_DIAG_SFP_ATTR(tx_bias, tx_bias, 5, "%hu"); ZFCP_DEFINE_DIAG_SFP_ATTR(tx_power, tx_power, 5, "%hu"); diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c index 9c5f7c9178c6..2b865c6423e2 100644 --- a/drivers/scsi/libfc/fc_disc.c +++ b/drivers/scsi/libfc/fc_disc.c @@ -628,6 +628,8 @@ redisc: } out: kref_put(&rdata->kref, fc_rport_destroy); + if (!IS_ERR(fp)) + fc_frame_free(fp); } /** diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index e4282bce5834..f45c22b09726 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -161,6 +161,7 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data) { struct scsi_disk *sdkp = scsi_disk(disk); + sector_t capacity = logical_to_sectors(sdkp->device, sdkp->capacity); unsigned int nr, i; unsigned char *buf; size_t offset, buflen = 0; @@ -171,11 +172,15 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, /* Not a zoned device */ return -EOPNOTSUPP; + if (!capacity) + /* Device gone or invalid */ + return -ENODEV; + buf = sd_zbc_alloc_report_buffer(sdkp, nr_zones, &buflen); if (!buf) return -ENOMEM; - while (zone_idx < nr_zones && sector < get_capacity(disk)) { + while (zone_idx < nr_zones && sector < capacity) { ret = sd_zbc_do_report_zones(sdkp, buf, buflen, sectors_to_logical(sdkp->device, sector), true); if (ret) diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 0fbb8fe6e521..e4240e4ae8bb 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -688,7 +688,7 @@ static const struct block_device_operations sr_bdops = .release = sr_block_release, .ioctl = sr_block_ioctl, #ifdef CONFIG_COMPAT - .ioctl = sr_block_compat_ioctl, + .compat_ioctl = sr_block_compat_ioctl, #endif .check_events = sr_block_check_events, .revalidate_disk = sr_block_revalidate_disk, diff --git a/drivers/soc/imx/soc-imx-scu.c b/drivers/soc/imx/soc-imx-scu.c index fb70b8a3f7c5..20d37eaeb5f2 100644 --- a/drivers/soc/imx/soc-imx-scu.c +++ b/drivers/soc/imx/soc-imx-scu.c @@ -25,7 +25,7 @@ struct imx_sc_msg_misc_get_soc_id { u32 id; } resp; } data; -} __packed; +} __packed __aligned(4); struct imx_sc_msg_misc_get_soc_uid { struct imx_sc_rpc_msg hdr; diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c index fd8007ebb145..13def7f78b9e 100644 --- a/drivers/spi/atmel-quadspi.c +++ b/drivers/spi/atmel-quadspi.c @@ -149,6 +149,7 @@ struct atmel_qspi { struct clk *qspick; struct platform_device *pdev; const struct atmel_qspi_caps *caps; + resource_size_t mmap_size; u32 pending; u32 mr; u32 scr; @@ -329,6 +330,14 @@ static int atmel_qspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op) u32 sr, offset; int err; + /* + * Check if the address exceeds the MMIO window size. An improvement + * would be to add support for regular SPI mode and fall back to it + * when the flash memories overrun the controller's memory space. + */ + if (op->addr.val + op->data.nbytes > aq->mmap_size) + return -ENOTSUPP; + err = atmel_qspi_set_cfg(aq, op, &offset); if (err) return err; @@ -480,6 +489,8 @@ static int atmel_qspi_probe(struct platform_device *pdev) goto exit; } + aq->mmap_size = resource_size(res); + /* Get the peripheral clock */ aq->pclk = devm_clk_get(&pdev->dev, "pclk"); if (IS_ERR(aq->pclk)) diff --git a/drivers/spi/spi-bcm63xx-hsspi.c b/drivers/spi/spi-bcm63xx-hsspi.c index 7327309ea3d5..6c235306c0e4 100644 --- a/drivers/spi/spi-bcm63xx-hsspi.c +++ b/drivers/spi/spi-bcm63xx-hsspi.c @@ -366,7 +366,6 @@ static int bcm63xx_hsspi_probe(struct platform_device *pdev) goto out_disable_clk; rate = clk_get_rate(pll_clk); - clk_disable_unprepare(pll_clk); if (!rate) { ret = -EINVAL; goto out_disable_pll_clk; diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c index 7e2292c11d12..e9e256718ef4 100644 --- a/drivers/spi/spi-omap2-mcspi.c +++ b/drivers/spi/spi-omap2-mcspi.c @@ -130,6 +130,7 @@ struct omap2_mcspi { int fifo_depth; bool slave_aborted; unsigned int pin_dir:1; + size_t max_xfer_len; }; struct omap2_mcspi_cs { @@ -974,20 +975,12 @@ static int omap2_mcspi_setup_transfer(struct spi_device *spi, * Note that we currently allow DMA only if we get a channel * for both rx and tx. Otherwise we'll do PIO for both rx and tx. */ -static int omap2_mcspi_request_dma(struct spi_device *spi) +static int omap2_mcspi_request_dma(struct omap2_mcspi *mcspi, + struct omap2_mcspi_dma *mcspi_dma) { - struct spi_master *master = spi->master; - struct omap2_mcspi *mcspi; - struct omap2_mcspi_dma *mcspi_dma; int ret = 0; - mcspi = spi_master_get_devdata(master); - mcspi_dma = mcspi->dma_channels + spi->chip_select; - - init_completion(&mcspi_dma->dma_rx_completion); - init_completion(&mcspi_dma->dma_tx_completion); - - mcspi_dma->dma_rx = dma_request_chan(&master->dev, + mcspi_dma->dma_rx = dma_request_chan(mcspi->dev, mcspi_dma->dma_rx_ch_name); if (IS_ERR(mcspi_dma->dma_rx)) { ret = PTR_ERR(mcspi_dma->dma_rx); @@ -995,7 +988,7 @@ static int omap2_mcspi_request_dma(struct spi_device *spi) goto no_dma; } - mcspi_dma->dma_tx = dma_request_chan(&master->dev, + mcspi_dma->dma_tx = dma_request_chan(mcspi->dev, mcspi_dma->dma_tx_ch_name); if (IS_ERR(mcspi_dma->dma_tx)) { ret = PTR_ERR(mcspi_dma->dma_tx); @@ -1004,20 +997,40 @@ static int omap2_mcspi_request_dma(struct spi_device *spi) mcspi_dma->dma_rx = NULL; } + init_completion(&mcspi_dma->dma_rx_completion); + init_completion(&mcspi_dma->dma_tx_completion); + no_dma: return ret; } +static void omap2_mcspi_release_dma(struct spi_master *master) +{ + struct omap2_mcspi *mcspi = spi_master_get_devdata(master); + struct omap2_mcspi_dma *mcspi_dma; + int i; + + for (i = 0; i < master->num_chipselect; i++) { + mcspi_dma = &mcspi->dma_channels[i]; + + if (mcspi_dma->dma_rx) { + dma_release_channel(mcspi_dma->dma_rx); + mcspi_dma->dma_rx = NULL; + } + if (mcspi_dma->dma_tx) { + dma_release_channel(mcspi_dma->dma_tx); + mcspi_dma->dma_tx = NULL; + } + } +} + static int omap2_mcspi_setup(struct spi_device *spi) { int ret; struct omap2_mcspi *mcspi = spi_master_get_devdata(spi->master); struct omap2_mcspi_regs *ctx = &mcspi->ctx; - struct omap2_mcspi_dma *mcspi_dma; struct omap2_mcspi_cs *cs = spi->controller_state; - mcspi_dma = &mcspi->dma_channels[spi->chip_select]; - if (!cs) { cs = kzalloc(sizeof *cs, GFP_KERNEL); if (!cs) @@ -1042,13 +1055,6 @@ static int omap2_mcspi_setup(struct spi_device *spi) } } - if (!mcspi_dma->dma_rx || !mcspi_dma->dma_tx) { - ret = omap2_mcspi_request_dma(spi); - if (ret) - dev_warn(&spi->dev, "not using DMA for McSPI (%d)\n", - ret); - } - ret = pm_runtime_get_sync(mcspi->dev); if (ret < 0) { pm_runtime_put_noidle(mcspi->dev); @@ -1065,12 +1071,8 @@ static int omap2_mcspi_setup(struct spi_device *spi) static void omap2_mcspi_cleanup(struct spi_device *spi) { - struct omap2_mcspi *mcspi; - struct omap2_mcspi_dma *mcspi_dma; struct omap2_mcspi_cs *cs; - mcspi = spi_master_get_devdata(spi->master); - if (spi->controller_state) { /* Unlink controller state from context save list */ cs = spi->controller_state; @@ -1079,19 +1081,6 @@ static void omap2_mcspi_cleanup(struct spi_device *spi) kfree(cs); } - if (spi->chip_select < spi->master->num_chipselect) { - mcspi_dma = &mcspi->dma_channels[spi->chip_select]; - - if (mcspi_dma->dma_rx) { - dma_release_channel(mcspi_dma->dma_rx); - mcspi_dma->dma_rx = NULL; - } - if (mcspi_dma->dma_tx) { - dma_release_channel(mcspi_dma->dma_tx); - mcspi_dma->dma_tx = NULL; - } - } - if (gpio_is_valid(spi->cs_gpio)) gpio_free(spi->cs_gpio); } @@ -1302,9 +1291,24 @@ static bool omap2_mcspi_can_dma(struct spi_master *master, if (spi_controller_is_slave(master)) return true; + master->dma_rx = mcspi_dma->dma_rx; + master->dma_tx = mcspi_dma->dma_tx; + return (xfer->len >= DMA_MIN_BYTES); } +static size_t omap2_mcspi_max_xfer_size(struct spi_device *spi) +{ + struct omap2_mcspi *mcspi = spi_master_get_devdata(spi->master); + struct omap2_mcspi_dma *mcspi_dma = + &mcspi->dma_channels[spi->chip_select]; + + if (mcspi->max_xfer_len && mcspi_dma->dma_rx) + return mcspi->max_xfer_len; + + return SIZE_MAX; +} + static int omap2_mcspi_controller_setup(struct omap2_mcspi *mcspi) { struct spi_master *master = mcspi->master; @@ -1373,6 +1377,11 @@ static struct omap2_mcspi_platform_config omap4_pdata = { .regs_offset = OMAP4_MCSPI_REG_OFFSET, }; +static struct omap2_mcspi_platform_config am654_pdata = { + .regs_offset = OMAP4_MCSPI_REG_OFFSET, + .max_xfer_len = SZ_4K - 1, +}; + static const struct of_device_id omap_mcspi_of_match[] = { { .compatible = "ti,omap2-mcspi", @@ -1382,6 +1391,10 @@ static const struct of_device_id omap_mcspi_of_match[] = { .compatible = "ti,omap4-mcspi", .data = &omap4_pdata, }, + { + .compatible = "ti,am654-mcspi", + .data = &am654_pdata, + }, { }, }; MODULE_DEVICE_TABLE(of, omap_mcspi_of_match); @@ -1439,6 +1452,10 @@ static int omap2_mcspi_probe(struct platform_device *pdev) mcspi->pin_dir = pdata->pin_dir; } regs_offset = pdata->regs_offset; + if (pdata->max_xfer_len) { + mcspi->max_xfer_len = pdata->max_xfer_len; + master->max_transfer_size = omap2_mcspi_max_xfer_size; + } r = platform_get_resource(pdev, IORESOURCE_MEM, 0); mcspi->base = devm_ioremap_resource(&pdev->dev, r); @@ -1464,6 +1481,11 @@ static int omap2_mcspi_probe(struct platform_device *pdev) for (i = 0; i < master->num_chipselect; i++) { sprintf(mcspi->dma_channels[i].dma_rx_ch_name, "rx%d", i); sprintf(mcspi->dma_channels[i].dma_tx_ch_name, "tx%d", i); + + status = omap2_mcspi_request_dma(mcspi, + &mcspi->dma_channels[i]); + if (status == -EPROBE_DEFER) + goto free_master; } status = platform_get_irq(pdev, 0); @@ -1501,6 +1523,7 @@ disable_pm: pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); free_master: + omap2_mcspi_release_dma(master); spi_master_put(master); return status; } @@ -1510,6 +1533,8 @@ static int omap2_mcspi_remove(struct platform_device *pdev) struct spi_master *master = platform_get_drvdata(pdev); struct omap2_mcspi *mcspi = spi_master_get_devdata(master); + omap2_mcspi_release_dma(master); + pm_runtime_dont_use_autosuspend(mcspi->dev); pm_runtime_put_sync(mcspi->dev); pm_runtime_disable(&pdev->dev); diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 4c7a71f0fb3e..2e318158fca9 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -70,6 +70,10 @@ MODULE_ALIAS("platform:pxa2xx-spi"); #define LPSS_CAPS_CS_EN_SHIFT 9 #define LPSS_CAPS_CS_EN_MASK (0xf << LPSS_CAPS_CS_EN_SHIFT) +#define LPSS_PRIV_CLOCK_GATE 0x38 +#define LPSS_PRIV_CLOCK_GATE_CLK_CTL_MASK 0x3 +#define LPSS_PRIV_CLOCK_GATE_CLK_CTL_FORCE_ON 0x3 + struct lpss_config { /* LPSS offset from drv_data->ioaddr */ unsigned offset; @@ -86,6 +90,8 @@ struct lpss_config { unsigned cs_sel_shift; unsigned cs_sel_mask; unsigned cs_num; + /* Quirks */ + unsigned cs_clk_stays_gated : 1; }; /* Keep these sorted with enum pxa_ssp_type */ @@ -156,6 +162,7 @@ static const struct lpss_config lpss_platforms[] = { .tx_threshold_hi = 56, .cs_sel_shift = 8, .cs_sel_mask = 3 << 8, + .cs_clk_stays_gated = true, }, }; @@ -383,6 +390,22 @@ static void lpss_ssp_cs_control(struct spi_device *spi, bool enable) else value |= LPSS_CS_CONTROL_CS_HIGH; __lpss_ssp_write_priv(drv_data, config->reg_cs_ctrl, value); + if (config->cs_clk_stays_gated) { + u32 clkgate; + + /* + * Changing CS alone when dynamic clock gating is on won't + * actually flip CS at that time. This ruins SPI transfers + * that specify delays, or have no data. Toggle the clock mode + * to force on briefly to poke the CS pin to move. + */ + clkgate = __lpss_ssp_read_priv(drv_data, LPSS_PRIV_CLOCK_GATE); + value = (clkgate & ~LPSS_PRIV_CLOCK_GATE_CLK_CTL_MASK) | + LPSS_PRIV_CLOCK_GATE_CLK_CTL_FORCE_ON; + + __lpss_ssp_write_priv(drv_data, LPSS_PRIV_CLOCK_GATE, value); + __lpss_ssp_write_priv(drv_data, LPSS_PRIV_CLOCK_GATE, clkgate); + } } static void cs_assert(struct spi_device *spi) diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c index dd3434a407ea..a364b99497e2 100644 --- a/drivers/spi/spi-qup.c +++ b/drivers/spi/spi-qup.c @@ -1217,6 +1217,11 @@ static int spi_qup_suspend(struct device *device) struct spi_qup *controller = spi_master_get_devdata(master); int ret; + if (pm_runtime_suspended(device)) { + ret = spi_qup_pm_resume_runtime(device); + if (ret) + return ret; + } ret = spi_master_suspend(master); if (ret) return ret; @@ -1225,10 +1230,8 @@ static int spi_qup_suspend(struct device *device) if (ret) return ret; - if (!pm_runtime_suspended(device)) { - clk_disable_unprepare(controller->cclk); - clk_disable_unprepare(controller->iclk); - } + clk_disable_unprepare(controller->cclk); + clk_disable_unprepare(controller->iclk); return 0; } diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c index 60c4de4e4485..7412a3042a8d 100644 --- a/drivers/spi/spi-zynqmp-gqspi.c +++ b/drivers/spi/spi-zynqmp-gqspi.c @@ -401,9 +401,6 @@ static void zynqmp_qspi_chipselect(struct spi_device *qspi, bool is_high) zynqmp_gqspi_write(xqspi, GQSPI_GEN_FIFO_OFST, genfifoentry); - /* Dummy generic FIFO entry */ - zynqmp_gqspi_write(xqspi, GQSPI_GEN_FIFO_OFST, 0x0); - /* Manually start the generic FIFO command */ zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST, zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST) | diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 38b4c78df506..755221bc3745 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2639,7 +2639,7 @@ int spi_register_controller(struct spi_controller *ctlr) if (ctlr->use_gpio_descriptors) { status = spi_get_gpio_descs(ctlr); if (status) - return status; + goto free_bus_id; /* * A controller using GPIO descriptors always * supports SPI_CS_HIGH if need be. @@ -2649,7 +2649,7 @@ int spi_register_controller(struct spi_controller *ctlr) /* Legacy code path for GPIOs from DT */ status = of_spi_get_gpio_numbers(ctlr); if (status) - return status; + goto free_bus_id; } } @@ -2657,17 +2657,14 @@ int spi_register_controller(struct spi_controller *ctlr) * Even if it's just one always-selected device, there must * be at least one chipselect. */ - if (!ctlr->num_chipselect) - return -EINVAL; + if (!ctlr->num_chipselect) { + status = -EINVAL; + goto free_bus_id; + } status = device_add(&ctlr->dev); - if (status < 0) { - /* free bus id */ - mutex_lock(&board_lock); - idr_remove(&spi_master_idr, ctlr->bus_num); - mutex_unlock(&board_lock); - goto done; - } + if (status < 0) + goto free_bus_id; dev_dbg(dev, "registered %s %s\n", spi_controller_is_slave(ctlr) ? "slave" : "master", dev_name(&ctlr->dev)); @@ -2683,11 +2680,7 @@ int spi_register_controller(struct spi_controller *ctlr) status = spi_controller_initialize_queue(ctlr); if (status) { device_del(&ctlr->dev); - /* free bus id */ - mutex_lock(&board_lock); - idr_remove(&spi_master_idr, ctlr->bus_num); - mutex_unlock(&board_lock); - goto done; + goto free_bus_id; } } /* add statistics */ @@ -2702,7 +2695,12 @@ int spi_register_controller(struct spi_controller *ctlr) /* Register devices from the device tree and ACPI */ of_register_spi_devices(ctlr); acpi_register_spi_devices(ctlr); -done: + return status; + +free_bus_id: + mutex_lock(&board_lock); + idr_remove(&spi_master_idr, ctlr->bus_num); + mutex_unlock(&board_lock); return status; } EXPORT_SYMBOL_GPL(spi_register_controller); diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index 1e217e3e9486..2ab6e782f14c 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -396,6 +396,7 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) else retval = get_user(tmp, (u32 __user *)arg); if (retval == 0) { + struct spi_controller *ctlr = spi->controller; u32 save = spi->mode; if (tmp & ~SPI_MODE_MASK) { @@ -403,6 +404,10 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) break; } + if (ctlr->use_gpio_descriptors && ctlr->cs_gpiods && + ctlr->cs_gpiods[spi->chip_select]) + tmp |= SPI_CS_HIGH; + tmp |= spi->mode & ~SPI_MODE_MASK; spi->mode = (u16)tmp; retval = spi_setup(spi); diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c index 97c615a2f057..c98835326135 100644 --- a/drivers/staging/media/hantro/hantro_drv.c +++ b/drivers/staging/media/hantro/hantro_drv.c @@ -558,13 +558,13 @@ static int hantro_attach_func(struct hantro_dev *vpu, goto err_rel_entity1; /* Connect the three entities */ - ret = media_create_pad_link(&func->vdev.entity, 0, &func->proc, 1, + ret = media_create_pad_link(&func->vdev.entity, 0, &func->proc, 0, MEDIA_LNK_FL_IMMUTABLE | MEDIA_LNK_FL_ENABLED); if (ret) goto err_rel_entity2; - ret = media_create_pad_link(&func->proc, 0, &func->sink, 0, + ret = media_create_pad_link(&func->proc, 1, &func->sink, 0, MEDIA_LNK_FL_IMMUTABLE | MEDIA_LNK_FL_ENABLED); if (ret) diff --git a/drivers/staging/speakup/selection.c b/drivers/staging/speakup/selection.c index a8b4d0c5ab7e..032f3264fba1 100644 --- a/drivers/staging/speakup/selection.c +++ b/drivers/staging/speakup/selection.c @@ -51,9 +51,7 @@ static void __speakup_set_selection(struct work_struct *work) goto unref; } - console_lock(); set_selection_kernel(&sel, tty); - console_unlock(); unref: tty_kref_put(tty); diff --git a/drivers/staging/wfx/Documentation/devicetree/bindings/net/wireless/siliabs,wfx.txt b/drivers/staging/wfx/Documentation/devicetree/bindings/net/wireless/siliabs,wfx.txt index 26de6762b942..081d58abd5ac 100644 --- a/drivers/staging/wfx/Documentation/devicetree/bindings/net/wireless/siliabs,wfx.txt +++ b/drivers/staging/wfx/Documentation/devicetree/bindings/net/wireless/siliabs,wfx.txt @@ -93,5 +93,5 @@ Some properties are recognized either by SPI and SDIO versions: Must contains 64 hexadecimal digits. Not supported in current version. WFx driver also supports `mac-address` and `local-mac-address` as described in -Documentation/devicetree/binding/net/ethernet.txt +Documentation/devicetree/bindings/net/ethernet.txt diff --git a/drivers/tee/amdtee/core.c b/drivers/tee/amdtee/core.c index 6370bb55f512..0026eb6f13ce 100644 --- a/drivers/tee/amdtee/core.c +++ b/drivers/tee/amdtee/core.c @@ -212,6 +212,19 @@ unlock: return rc; } +static void destroy_session(struct kref *ref) +{ + struct amdtee_session *sess = container_of(ref, struct amdtee_session, + refcount); + + /* Unload the TA from TEE */ + handle_unload_ta(sess->ta_handle); + mutex_lock(&session_list_mutex); + list_del(&sess->list_node); + mutex_unlock(&session_list_mutex); + kfree(sess); +} + int amdtee_open_session(struct tee_context *ctx, struct tee_ioctl_open_session_arg *arg, struct tee_param *param) @@ -236,15 +249,13 @@ int amdtee_open_session(struct tee_context *ctx, /* Load the TA binary into TEE environment */ handle_load_ta(ta, ta_size, arg); - if (arg->ret == TEEC_SUCCESS) { - mutex_lock(&session_list_mutex); - sess = alloc_session(ctxdata, arg->session); - mutex_unlock(&session_list_mutex); - } - if (arg->ret != TEEC_SUCCESS) goto out; + mutex_lock(&session_list_mutex); + sess = alloc_session(ctxdata, arg->session); + mutex_unlock(&session_list_mutex); + if (!sess) { rc = -ENOMEM; goto out; @@ -259,40 +270,29 @@ int amdtee_open_session(struct tee_context *ctx, if (i >= TEE_NUM_SESSIONS) { pr_err("reached maximum session count %d\n", TEE_NUM_SESSIONS); + kref_put(&sess->refcount, destroy_session); rc = -ENOMEM; goto out; } /* Open session with loaded TA */ handle_open_session(arg, &session_info, param); - - if (arg->ret == TEEC_SUCCESS) { - sess->session_info[i] = session_info; - set_session_id(sess->ta_handle, i, &arg->session); - } else { + if (arg->ret != TEEC_SUCCESS) { pr_err("open_session failed %d\n", arg->ret); spin_lock(&sess->lock); clear_bit(i, sess->sess_mask); spin_unlock(&sess->lock); + kref_put(&sess->refcount, destroy_session); + goto out; } + + sess->session_info[i] = session_info; + set_session_id(sess->ta_handle, i, &arg->session); out: free_pages((u64)ta, get_order(ta_size)); return rc; } -static void destroy_session(struct kref *ref) -{ - struct amdtee_session *sess = container_of(ref, struct amdtee_session, - refcount); - - /* Unload the TA from TEE */ - handle_unload_ta(sess->ta_handle); - mutex_lock(&session_list_mutex); - list_del(&sess->list_node); - mutex_unlock(&session_list_mutex); - kfree(sess); -} - int amdtee_close_session(struct tee_context *ctx, u32 session) { struct amdtee_context_data *ctxdata = ctx->data; diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c index 42345e79920c..c5f0d936b003 100644 --- a/drivers/tty/serdev/core.c +++ b/drivers/tty/serdev/core.c @@ -18,6 +18,7 @@ #include <linux/sched.h> #include <linux/serdev.h> #include <linux/slab.h> +#include <linux/platform_data/x86/apple.h> static bool is_registered; static DEFINE_IDA(ctrl_ida); @@ -631,6 +632,15 @@ static int acpi_serdev_check_resources(struct serdev_controller *ctrl, if (ret) return ret; + /* + * Apple machines provide an empty resource template, so on those + * machines just look for immediate children with a "baud" property + * (from the _DSM method) instead. + */ + if (!lookup.controller_handle && x86_apple_machine && + !acpi_dev_get_property(adev, "baud", ACPI_TYPE_BUFFER, NULL)) + acpi_get_parent(adev->handle, &lookup.controller_handle); + /* Make sure controller and ResourceSource handle match */ if (ACPI_HANDLE(ctrl->dev.parent) != lookup.controller_handle) return -ENODEV; diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c index 91e9b070d36d..d330da76d6b6 100644 --- a/drivers/tty/serial/8250/8250_exar.c +++ b/drivers/tty/serial/8250/8250_exar.c @@ -25,6 +25,14 @@ #include "8250.h" +#define PCI_DEVICE_ID_ACCES_COM_2S 0x1052 +#define PCI_DEVICE_ID_ACCES_COM_4S 0x105d +#define PCI_DEVICE_ID_ACCES_COM_8S 0x106c +#define PCI_DEVICE_ID_ACCES_COM232_8 0x10a8 +#define PCI_DEVICE_ID_ACCES_COM_2SM 0x10d2 +#define PCI_DEVICE_ID_ACCES_COM_4SM 0x10db +#define PCI_DEVICE_ID_ACCES_COM_8SM 0x10ea + #define PCI_DEVICE_ID_COMMTECH_4224PCI335 0x0002 #define PCI_DEVICE_ID_COMMTECH_4222PCI335 0x0004 #define PCI_DEVICE_ID_COMMTECH_2324PCI335 0x000a @@ -677,6 +685,22 @@ static int __maybe_unused exar_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(exar_pci_pm, exar_suspend, exar_resume); +static const struct exar8250_board acces_com_2x = { + .num_ports = 2, + .setup = pci_xr17c154_setup, +}; + +static const struct exar8250_board acces_com_4x = { + .num_ports = 4, + .setup = pci_xr17c154_setup, +}; + +static const struct exar8250_board acces_com_8x = { + .num_ports = 8, + .setup = pci_xr17c154_setup, +}; + + static const struct exar8250_board pbn_fastcom335_2 = { .num_ports = 2, .setup = pci_fastcom335_setup, @@ -745,6 +769,15 @@ static const struct exar8250_board pbn_exar_XR17V8358 = { } static const struct pci_device_id exar_pci_tbl[] = { + EXAR_DEVICE(ACCESSIO, ACCES_COM_2S, acces_com_2x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_4S, acces_com_4x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_8S, acces_com_8x), + EXAR_DEVICE(ACCESSIO, ACCES_COM232_8, acces_com_8x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_2SM, acces_com_2x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_4SM, acces_com_4x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_8SM, acces_com_8x), + + CONNECT_DEVICE(XR17C152, UART_2_232, pbn_connect), CONNECT_DEVICE(XR17C154, UART_4_232, pbn_connect), CONNECT_DEVICE(XR17C158, UART_8_232, pbn_connect), diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 91e2805e6441..c31b8f3db6bf 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -264,6 +264,7 @@ struct lpuart_port { int rx_dma_rng_buf_len; unsigned int dma_tx_nents; wait_queue_head_t dma_wait; + bool id_allocated; }; struct lpuart_soc_data { @@ -2390,6 +2391,8 @@ static int __init lpuart32_imx_early_console_setup(struct earlycon_device *devic OF_EARLYCON_DECLARE(lpuart, "fsl,vf610-lpuart", lpuart_early_console_setup); OF_EARLYCON_DECLARE(lpuart32, "fsl,ls1021a-lpuart", lpuart32_early_console_setup); OF_EARLYCON_DECLARE(lpuart32, "fsl,imx7ulp-lpuart", lpuart32_imx_early_console_setup); +EARLYCON_DECLARE(lpuart, lpuart_early_console_setup); +EARLYCON_DECLARE(lpuart32, lpuart32_early_console_setup); #define LPUART_CONSOLE (&lpuart_console) #define LPUART32_CONSOLE (&lpuart32_console) @@ -2420,19 +2423,6 @@ static int lpuart_probe(struct platform_device *pdev) if (!sport) return -ENOMEM; - ret = of_alias_get_id(np, "serial"); - if (ret < 0) { - ret = ida_simple_get(&fsl_lpuart_ida, 0, UART_NR, GFP_KERNEL); - if (ret < 0) { - dev_err(&pdev->dev, "port line is full, add device failed\n"); - return ret; - } - } - if (ret >= ARRAY_SIZE(lpuart_ports)) { - dev_err(&pdev->dev, "serial%d out of range\n", ret); - return -EINVAL; - } - sport->port.line = ret; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); sport->port.membase = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(sport->port.membase)) @@ -2477,9 +2467,25 @@ static int lpuart_probe(struct platform_device *pdev) } } + ret = of_alias_get_id(np, "serial"); + if (ret < 0) { + ret = ida_simple_get(&fsl_lpuart_ida, 0, UART_NR, GFP_KERNEL); + if (ret < 0) { + dev_err(&pdev->dev, "port line is full, add device failed\n"); + return ret; + } + sport->id_allocated = true; + } + if (ret >= ARRAY_SIZE(lpuart_ports)) { + dev_err(&pdev->dev, "serial%d out of range\n", ret); + ret = -EINVAL; + goto failed_out_of_range; + } + sport->port.line = ret; + ret = lpuart_enable_clks(sport); if (ret) - return ret; + goto failed_clock_enable; sport->port.uartclk = lpuart_get_baud_clk_rate(sport); lpuart_ports[sport->port.line] = sport; @@ -2529,6 +2535,10 @@ static int lpuart_probe(struct platform_device *pdev) failed_attach_port: failed_irq_request: lpuart_disable_clks(sport); +failed_clock_enable: +failed_out_of_range: + if (sport->id_allocated) + ida_simple_remove(&fsl_lpuart_ida, sport->port.line); return ret; } @@ -2538,7 +2548,8 @@ static int lpuart_remove(struct platform_device *pdev) uart_remove_one_port(&lpuart_reg, &sport->port); - ida_simple_remove(&fsl_lpuart_ida, sport->port.line); + if (sport->id_allocated) + ida_simple_remove(&fsl_lpuart_ida, sport->port.line); lpuart_disable_clks(sport); diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c index c12a12556339..4e9a590712cb 100644 --- a/drivers/tty/serial/mvebu-uart.c +++ b/drivers/tty/serial/mvebu-uart.c @@ -851,7 +851,7 @@ static int mvebu_uart_probe(struct platform_device *pdev) port->membase = devm_ioremap_resource(&pdev->dev, reg); if (IS_ERR(port->membase)) - return -PTR_ERR(port->membase); + return PTR_ERR(port->membase); mvuart = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_uart), GFP_KERNEL); diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index 0c50d7410b31..d7d2e4b844bc 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -181,7 +181,7 @@ int set_selection_user(const struct tiocl_selection __user *sel, return set_selection_kernel(&v, tty); } -int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty) +static int __set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty) { struct vc_data *vc = vc_cons[fg_console].d; int new_sel_start, new_sel_end, spc; @@ -214,7 +214,6 @@ int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty) if (ps > pe) /* make sel_start <= sel_end */ swap(ps, pe); - mutex_lock(&sel_lock); if (sel_cons != vc_cons[fg_console].d) { clear_selection(); sel_cons = vc_cons[fg_console].d; @@ -260,10 +259,9 @@ int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty) break; case TIOCL_SELPOINTER: highlight_pointer(pe); - goto unlock; + return 0; default: - ret = -EINVAL; - goto unlock; + return -EINVAL; } /* remove the pointer */ @@ -285,7 +283,7 @@ int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty) else if (new_sel_start == sel_start) { if (new_sel_end == sel_end) /* no action required */ - goto unlock; + return 0; else if (new_sel_end > sel_end) /* extend to right */ highlight(sel_end + 2, new_sel_end); else /* contract from right */ @@ -313,8 +311,7 @@ int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty) if (!bp) { printk(KERN_WARNING "selection: kmalloc() failed\n"); clear_selection(); - ret = -ENOMEM; - goto unlock; + return -ENOMEM; } kfree(sel_buffer); sel_buffer = bp; @@ -339,8 +336,20 @@ int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty) } } sel_buffer_lth = bp - sel_buffer; -unlock: + + return ret; +} + +int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty) +{ + int ret; + + mutex_lock(&sel_lock); + console_lock(); + ret = __set_selection_kernel(v, tty); + console_unlock(); mutex_unlock(&sel_lock); + return ret; } EXPORT_SYMBOL_GPL(set_selection_kernel); diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 0cfbb7182b5a..15d27698054a 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -3046,10 +3046,8 @@ int tioclinux(struct tty_struct *tty, unsigned long arg) switch (type) { case TIOCL_SETSEL: - console_lock(); ret = set_selection_user((struct tiocl_selection __user *)(p+1), tty); - console_unlock(); break; case TIOCL_PASTESEL: ret = paste_selection(tty); diff --git a/drivers/usb/cdns3/gadget.c b/drivers/usb/cdns3/gadget.c index 736b0c6e27fe..3574dbb09366 100644 --- a/drivers/usb/cdns3/gadget.c +++ b/drivers/usb/cdns3/gadget.c @@ -2550,7 +2550,7 @@ found: /* Update ring only if removed request is on pending_req_list list */ if (req_on_hw_ring) { link_trb->buffer = TRB_BUFFER(priv_ep->trb_pool_dma + - (priv_req->start_trb * TRB_SIZE)); + ((priv_req->end_trb + 1) * TRB_SIZE)); link_trb->control = (link_trb->control & TRB_CYCLE) | TRB_TYPE(TRB_LINK) | TRB_CHAIN; @@ -2595,11 +2595,21 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep) { struct cdns3_device *priv_dev = priv_ep->cdns3_dev; struct usb_request *request; + struct cdns3_request *priv_req; + struct cdns3_trb *trb = NULL; int ret; int val; trace_cdns3_halt(priv_ep, 0, 0); + request = cdns3_next_request(&priv_ep->pending_req_list); + if (request) { + priv_req = to_cdns3_request(request); + trb = priv_req->trb; + if (trb) + trb->control = trb->control ^ TRB_CYCLE; + } + writel(EP_CMD_CSTALL | EP_CMD_EPRST, &priv_dev->regs->ep_cmd); /* wait for EPRST cleared */ @@ -2610,10 +2620,11 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep) priv_ep->flags &= ~(EP_STALLED | EP_STALL_PENDING); - request = cdns3_next_request(&priv_ep->pending_req_list); - - if (request) + if (request) { + if (trb) + trb->control = trb->control ^ TRB_CYCLE; cdns3_rearm_transfer(priv_ep, 1); + } cdns3_start_all_request(priv_dev, priv_ep); return ret; diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 1d212f82c69b..54cd8ef795ec 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -988,13 +988,17 @@ int usb_remove_device(struct usb_device *udev) { struct usb_hub *hub; struct usb_interface *intf; + int ret; if (!udev->parent) /* Can't remove a root hub */ return -EINVAL; hub = usb_hub_to_struct_hub(udev->parent); intf = to_usb_interface(hub->intfdev); - usb_autopm_get_interface(intf); + ret = usb_autopm_get_interface(intf); + if (ret < 0) + return ret; + set_bit(udev->portnum, hub->removed_bits); hub_port_logical_disconnect(hub, udev->portnum); usb_autopm_put_interface(intf); @@ -1866,7 +1870,7 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id) if (id->driver_info & HUB_QUIRK_DISABLE_AUTOSUSPEND) { hub->quirk_disable_autosuspend = 1; - usb_autopm_get_interface(intf); + usb_autopm_get_interface_no_resume(intf); } if (hub_configure(hub, &desc->endpoint[0].desc) >= 0) diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index bbbb35fa639f..235a7c645503 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -213,7 +213,10 @@ static int usb_port_runtime_resume(struct device *dev) if (!port_dev->is_superspeed && peer) pm_runtime_get_sync(&peer->dev); - usb_autopm_get_interface(intf); + retval = usb_autopm_get_interface(intf); + if (retval < 0) + return retval; + retval = usb_hub_set_port_power(hdev, hub, port1, true); msleep(hub_power_on_good_delay(hub)); if (udev && !retval) { @@ -266,7 +269,10 @@ static int usb_port_runtime_suspend(struct device *dev) if (usb_port_block_power_off) return -EBUSY; - usb_autopm_get_interface(intf); + retval = usb_autopm_get_interface(intf); + if (retval < 0) + return retval; + retval = usb_hub_set_port_power(hdev, hub, port1, false); usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_CONNECTION); if (!port_dev->is_superspeed) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 2b24336a72e5..2dac3e7cdd97 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -231,6 +231,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* Logitech PTZ Pro Camera */ { USB_DEVICE(0x046d, 0x0853), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Logitech Screen Share */ + { USB_DEVICE(0x046d, 0x086c), .driver_info = USB_QUIRK_NO_LPM }, + /* Logitech Quickcam Fusion */ { USB_DEVICE(0x046d, 0x08c1), .driver_info = USB_QUIRK_RESET_RESUME }, diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 1b7d2f9cb673..1e00bf2d65a2 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1071,7 +1071,14 @@ static void dwc3_prepare_one_trb_sg(struct dwc3_ep *dep, unsigned int rem = length % maxp; unsigned chain = true; - if (sg_is_last(s)) + /* + * IOMMU driver is coalescing the list of sgs which shares a + * page boundary into one and giving it to USB driver. With + * this the number of sgs mapped is not equal to the number of + * sgs passed. So mark the chain bit to false if it isthe last + * mapped sg. + */ + if (i == remaining - 1) chain = false; if (rem && usb_endpoint_dir_out(dep->endpoint.desc) && !chain) { diff --git a/drivers/usb/misc/usb251xb.c b/drivers/usb/misc/usb251xb.c index 10c9e7f6273e..29fe5771c21b 100644 --- a/drivers/usb/misc/usb251xb.c +++ b/drivers/usb/misc/usb251xb.c @@ -424,10 +424,6 @@ static int usb251xb_get_ofdata(struct usb251xb *hub, return err; } - hub->vdd = devm_regulator_get(dev, "vdd"); - if (IS_ERR(hub->vdd)) - return PTR_ERR(hub->vdd); - if (of_property_read_u16_array(np, "vendor-id", &hub->vendor_id, 1)) hub->vendor_id = USB251XB_DEF_VENDOR_ID; @@ -640,6 +636,13 @@ static int usb251xb_get_ofdata(struct usb251xb *hub, } #endif /* CONFIG_OF */ +static void usb251xb_regulator_disable_action(void *data) +{ + struct usb251xb *hub = data; + + regulator_disable(hub->vdd); +} + static int usb251xb_probe(struct usb251xb *hub) { struct device *dev = hub->dev; @@ -676,10 +679,19 @@ static int usb251xb_probe(struct usb251xb *hub) if (err) return err; + hub->vdd = devm_regulator_get(dev, "vdd"); + if (IS_ERR(hub->vdd)) + return PTR_ERR(hub->vdd); + err = regulator_enable(hub->vdd); if (err) return err; + err = devm_add_action_or_reset(dev, + usb251xb_regulator_disable_action, hub); + if (err) + return err; + err = usb251xb_connect(hub); if (err) { dev_err(dev, "Failed to connect hub (%d)\n", err); diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 1cd9b6305b06..1880f3e13f57 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -1258,6 +1258,12 @@ UNUSUAL_DEV( 0x090a, 0x1200, 0x0000, 0x9999, USB_SC_RBC, USB_PR_BULK, NULL, 0 ), +UNUSUAL_DEV(0x090c, 0x1000, 0x1100, 0x1100, + "Samsung", + "Flash Drive FIT", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_MAX_SECTORS_64), + /* aeb */ UNUSUAL_DEV( 0x090c, 0x1132, 0x0000, 0xffff, "Feiya", diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig index 403707a3e503..0093bbd0d326 100644 --- a/drivers/video/backlight/Kconfig +++ b/drivers/video/backlight/Kconfig @@ -456,6 +456,13 @@ config BACKLIGHT_RAVE_SP help Support for backlight control on RAVE SP device. +config BACKLIGHT_LED + tristate "Generic LED based Backlight Driver" + depends on LEDS_CLASS && OF + help + If you have a LCD backlight adjustable by LED class driver, say Y + to enable this driver. + endif # BACKLIGHT_CLASS_DEVICE endmenu diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile index 6f8777037c37..0c1a1524627a 100644 --- a/drivers/video/backlight/Makefile +++ b/drivers/video/backlight/Makefile @@ -57,3 +57,4 @@ obj-$(CONFIG_BACKLIGHT_TPS65217) += tps65217_bl.o obj-$(CONFIG_BACKLIGHT_WM831X) += wm831x_bl.o obj-$(CONFIG_BACKLIGHT_ARCXCNN) += arcxcnn_bl.o obj-$(CONFIG_BACKLIGHT_RAVE_SP) += rave-sp-backlight.o +obj-$(CONFIG_BACKLIGHT_LED) += led_bl.o diff --git a/drivers/video/backlight/led_bl.c b/drivers/video/backlight/led_bl.c new file mode 100644 index 000000000000..3f66549997c8 --- /dev/null +++ b/drivers/video/backlight/led_bl.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Texas Instruments Incorporated - http://www.ti.com/ + * Author: Tomi Valkeinen <tomi.valkeinen@ti.com> + * + * Based on pwm_bl.c + */ + +#include <linux/backlight.h> +#include <linux/leds.h> +#include <linux/module.h> +#include <linux/platform_device.h> + +struct led_bl_data { + struct device *dev; + struct backlight_device *bl_dev; + struct led_classdev **leds; + bool enabled; + int nb_leds; + unsigned int *levels; + unsigned int default_brightness; + unsigned int max_brightness; +}; + +static void led_bl_set_brightness(struct led_bl_data *priv, int level) +{ + int i; + int bkl_brightness; + + if (priv->levels) + bkl_brightness = priv->levels[level]; + else + bkl_brightness = level; + + for (i = 0; i < priv->nb_leds; i++) + led_set_brightness(priv->leds[i], bkl_brightness); + + priv->enabled = true; +} + +static void led_bl_power_off(struct led_bl_data *priv) +{ + int i; + + if (!priv->enabled) + return; + + for (i = 0; i < priv->nb_leds; i++) + led_set_brightness(priv->leds[i], LED_OFF); + + priv->enabled = false; +} + +static int led_bl_update_status(struct backlight_device *bl) +{ + struct led_bl_data *priv = bl_get_data(bl); + int brightness = bl->props.brightness; + + if (bl->props.power != FB_BLANK_UNBLANK || + bl->props.fb_blank != FB_BLANK_UNBLANK || + bl->props.state & BL_CORE_FBBLANK) + brightness = 0; + + if (brightness > 0) + led_bl_set_brightness(priv, brightness); + else + led_bl_power_off(priv); + + return 0; +} + +static const struct backlight_ops led_bl_ops = { + .update_status = led_bl_update_status, +}; + +static int led_bl_get_leds(struct device *dev, + struct led_bl_data *priv) +{ + int i, nb_leds, ret; + struct device_node *node = dev->of_node; + struct led_classdev **leds; + unsigned int max_brightness; + unsigned int default_brightness; + + ret = of_count_phandle_with_args(node, "leds", NULL); + if (ret < 0) { + dev_err(dev, "Unable to get led count\n"); + return -EINVAL; + } + + nb_leds = ret; + if (nb_leds < 1) { + dev_err(dev, "At least one LED must be specified!\n"); + return -EINVAL; + } + + leds = devm_kzalloc(dev, sizeof(struct led_classdev *) * nb_leds, + GFP_KERNEL); + if (!leds) + return -ENOMEM; + + for (i = 0; i < nb_leds; i++) { + leds[i] = devm_of_led_get(dev, i); + if (IS_ERR(leds[i])) + return PTR_ERR(leds[i]); + } + + /* check that the LEDs all have the same brightness range */ + max_brightness = leds[0]->max_brightness; + for (i = 1; i < nb_leds; i++) { + if (max_brightness != leds[i]->max_brightness) { + dev_err(dev, "LEDs must have identical ranges\n"); + return -EINVAL; + } + } + + /* get the default brightness from the first LED from the list */ + default_brightness = leds[0]->brightness; + + priv->nb_leds = nb_leds; + priv->leds = leds; + priv->max_brightness = max_brightness; + priv->default_brightness = default_brightness; + + return 0; +} + +static int led_bl_parse_levels(struct device *dev, + struct led_bl_data *priv) +{ + struct device_node *node = dev->of_node; + int num_levels; + u32 value; + int ret; + + if (!node) + return -ENODEV; + + num_levels = of_property_count_u32_elems(node, "brightness-levels"); + if (num_levels > 1) { + int i; + unsigned int db; + u32 *levels = NULL; + + levels = devm_kzalloc(dev, sizeof(u32) * num_levels, + GFP_KERNEL); + if (!levels) + return -ENOMEM; + + ret = of_property_read_u32_array(node, "brightness-levels", + levels, + num_levels); + if (ret < 0) + return ret; + + /* + * Try to map actual LED brightness to backlight brightness + * level + */ + db = priv->default_brightness; + for (i = 0 ; i < num_levels; i++) { + if ((i && db > levels[i-1]) && db <= levels[i]) + break; + } + priv->default_brightness = i; + priv->max_brightness = num_levels - 1; + priv->levels = levels; + } else if (num_levels >= 0) + dev_warn(dev, "Not enough levels defined\n"); + + ret = of_property_read_u32(node, "default-brightness-level", &value); + if (!ret && value <= priv->max_brightness) + priv->default_brightness = value; + else if (!ret && value > priv->max_brightness) + dev_warn(dev, "Invalid default brightness. Ignoring it\n"); + + return 0; +} + +static int led_bl_probe(struct platform_device *pdev) +{ + struct backlight_properties props; + struct led_bl_data *priv; + int ret, i; + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + platform_set_drvdata(pdev, priv); + + priv->dev = &pdev->dev; + + ret = led_bl_get_leds(&pdev->dev, priv); + if (ret) + return ret; + + ret = led_bl_parse_levels(&pdev->dev, priv); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to parse DT data\n"); + return ret; + } + + memset(&props, 0, sizeof(struct backlight_properties)); + props.type = BACKLIGHT_RAW; + props.max_brightness = priv->max_brightness; + props.brightness = priv->default_brightness; + props.power = (priv->default_brightness > 0) ? FB_BLANK_POWERDOWN : + FB_BLANK_UNBLANK; + priv->bl_dev = backlight_device_register(dev_name(&pdev->dev), + &pdev->dev, priv, &led_bl_ops, &props); + if (IS_ERR(priv->bl_dev)) { + dev_err(&pdev->dev, "Failed to register backlight\n"); + return PTR_ERR(priv->bl_dev); + } + + for (i = 0; i < priv->nb_leds; i++) + led_sysfs_disable(priv->leds[i]); + + backlight_update_status(priv->bl_dev); + + return 0; +} + +static int led_bl_remove(struct platform_device *pdev) +{ + struct led_bl_data *priv = platform_get_drvdata(pdev); + struct backlight_device *bl = priv->bl_dev; + int i; + + backlight_device_unregister(bl); + + led_bl_power_off(priv); + for (i = 0; i < priv->nb_leds; i++) + led_sysfs_enable(priv->leds[i]); + + return 0; +} + +static const struct of_device_id led_bl_of_match[] = { + { .compatible = "led-backlight" }, + { } +}; + +MODULE_DEVICE_TABLE(of, led_bl_of_match); + +static struct platform_driver led_bl_driver = { + .driver = { + .name = "led-backlight", + .of_match_table = of_match_ptr(led_bl_of_match), + }, + .probe = led_bl_probe, + .remove = led_bl_remove, +}; + +module_platform_driver(led_bl_driver); + +MODULE_DESCRIPTION("LED based Backlight Driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:led-backlight"); diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index de7b8382aba9..998b0de1812f 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -1316,6 +1316,9 @@ static int vgacon_font_get(struct vc_data *c, struct console_font *font) static int vgacon_resize(struct vc_data *c, unsigned int width, unsigned int height, unsigned int user) { + if ((width << 1) * height > vga_vram_size) + return -EINVAL; + if (width % 2 || width > screen_info.orig_video_cols || height > (screen_info.orig_video_lines * vga_default_font_height)/ c->vc_font.height) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 7bfe365d9372..341458fd95ca 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -959,8 +959,8 @@ out_iput: iput(vb->vb_dev_info.inode); out_kern_unmount: kern_unmount(balloon_mnt); -#endif out_del_vqs: +#endif vdev->config->del_vqs(vdev); out_free_vb: kfree(vb); diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 867c7ebd3f10..58b96baa8d48 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2203,10 +2203,10 @@ void vring_del_virtqueue(struct virtqueue *_vq) vq->split.queue_size_in_bytes, vq->split.vring.desc, vq->split.queue_dma_addr); - - kfree(vq->split.desc_state); } } + if (!vq->packed_ring) + kfree(vq->split.desc_state); list_del(&_vq->list); kfree(vq); } diff --git a/drivers/watchdog/wdat_wdt.c b/drivers/watchdog/wdat_wdt.c index b069349b52f5..3065dd670a18 100644 --- a/drivers/watchdog/wdat_wdt.c +++ b/drivers/watchdog/wdat_wdt.c @@ -54,6 +54,13 @@ module_param(nowayout, bool, 0); MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); +#define WDAT_DEFAULT_TIMEOUT 30 + +static int timeout = WDAT_DEFAULT_TIMEOUT; +module_param(timeout, int, 0); +MODULE_PARM_DESC(timeout, "Watchdog timeout in seconds (default=" + __MODULE_STRING(WDAT_DEFAULT_TIMEOUT) ")"); + static int wdat_wdt_read(struct wdat_wdt *wdat, const struct wdat_instruction *instr, u32 *value) { @@ -389,7 +396,7 @@ static int wdat_wdt_probe(struct platform_device *pdev) memset(&r, 0, sizeof(r)); r.start = gas->address; - r.end = r.start + gas->access_width - 1; + r.end = r.start + ACPI_ACCESS_BYTE_WIDTH(gas->access_width) - 1; if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { r.flags = IORESOURCE_MEM; } else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { @@ -438,6 +445,22 @@ static int wdat_wdt_probe(struct platform_device *pdev) platform_set_drvdata(pdev, wdat); + /* + * Set initial timeout so that userspace has time to configure the + * watchdog properly after it has opened the device. In some cases + * the BIOS default is too short and causes immediate reboot. + */ + if (timeout * 1000 < wdat->wdd.min_hw_heartbeat_ms || + timeout * 1000 > wdat->wdd.max_hw_heartbeat_ms) { + dev_warn(dev, "Invalid timeout %d given, using %d\n", + timeout, WDAT_DEFAULT_TIMEOUT); + timeout = WDAT_DEFAULT_TIMEOUT; + } + + ret = wdat_wdt_set_timeout(&wdat->wdd, timeout); + if (ret) + return ret; + watchdog_set_nowayout(&wdat->wdd, nowayout); return devm_watchdog_register_device(dev, &wdat->wdd); } diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h index ce1077e32466..7c95516a860f 100644 --- a/drivers/xen/xen-pciback/pciback.h +++ b/drivers/xen/xen-pciback/pciback.h @@ -52,7 +52,7 @@ struct xen_pcibk_dev_data { unsigned int ack_intr:1; /* .. and ACK-ing */ unsigned long handled; unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */ - char irq_name[0]; /* xen-pcibk[000:04:00.0] */ + char irq_name[]; /* xen-pcibk[000:04:00.0] */ }; /* Used by XenBus and xen_pcibk_ops.c */ diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index d239fc3c5e3d..eb5151fc8efa 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -313,6 +313,8 @@ static int process_msg(void) req->msg.type = state.msg.type; req->msg.len = state.msg.len; req->body = state.body; + /* write body, then update state */ + virt_wmb(); req->state = xb_req_state_got_reply; req->cb(req); } else @@ -395,6 +397,8 @@ static int process_writes(void) if (state.req->state == xb_req_state_aborted) kfree(state.req); else { + /* write err, then update state */ + virt_wmb(); state.req->state = xb_req_state_got_reply; wake_up(&state.req->wq); } diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 66975da4f3b6..8c4d05b687b7 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -239,9 +239,9 @@ int xenbus_dev_probe(struct device *_dev) goto fail; } - spin_lock(&dev->reclaim_lock); + down(&dev->reclaim_sem); err = drv->probe(dev, id); - spin_unlock(&dev->reclaim_lock); + up(&dev->reclaim_sem); if (err) goto fail_put; @@ -271,9 +271,9 @@ int xenbus_dev_remove(struct device *_dev) free_otherend_watch(dev); if (drv->remove) { - spin_lock(&dev->reclaim_lock); + down(&dev->reclaim_sem); drv->remove(dev); - spin_unlock(&dev->reclaim_lock); + up(&dev->reclaim_sem); } module_put(drv->driver.owner); @@ -473,7 +473,7 @@ int xenbus_probe_node(struct xen_bus_type *bus, goto fail; dev_set_name(&xendev->dev, "%s", devname); - spin_lock_init(&xendev->reclaim_lock); + sema_init(&xendev->reclaim_sem, 1); /* Register with generic device framework. */ err = device_register(&xendev->dev); diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index 791f6fe01e91..9b2fbe69bccc 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -45,6 +45,7 @@ #include <linux/mm.h> #include <linux/notifier.h> #include <linux/export.h> +#include <linux/semaphore.h> #include <asm/page.h> #include <asm/pgtable.h> @@ -257,10 +258,10 @@ static int backend_reclaim_memory(struct device *dev, void *data) drv = to_xenbus_driver(dev->driver); if (drv && drv->reclaim_memory) { xdev = to_xenbus_device(dev); - if (!spin_trylock(&xdev->reclaim_lock)) + if (down_trylock(&xdev->reclaim_sem)) return 0; drv->reclaim_memory(xdev); - spin_unlock(&xdev->reclaim_lock); + up(&xdev->reclaim_sem); } return 0; } diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index ddc18da61834..3a06eb699f33 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -191,8 +191,11 @@ static bool xenbus_ok(void) static bool test_reply(struct xb_req_data *req) { - if (req->state == xb_req_state_got_reply || !xenbus_ok()) + if (req->state == xb_req_state_got_reply || !xenbus_ok()) { + /* read req->state before all other fields */ + virt_rmb(); return true; + } /* Make sure to reread req->state each time. */ barrier(); @@ -202,7 +205,7 @@ static bool test_reply(struct xb_req_data *req) static void *read_reply(struct xb_req_data *req) { - while (req->state != xb_req_state_got_reply) { + do { wait_event(req->wq, test_reply(req)); if (!xenbus_ok()) @@ -216,7 +219,7 @@ static void *read_reply(struct xb_req_data *req) if (req->err) return ERR_PTR(req->err); - } + } while (req->state != xb_req_state_got_reply); return req->body; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1ccb3f8d528d..27076ebadb36 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7783,6 +7783,7 @@ static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode, { struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio); + u16 csum_size; blk_status_t ret; /* @@ -7802,7 +7803,8 @@ static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode, file_offset -= dip->logical_offset; file_offset >>= inode->i_sb->s_blocksize_bits; - io_bio->csum = (u8 *)(((u32 *)orig_io_bio->csum) + file_offset); + csum_size = btrfs_super_csum_size(btrfs_sb(inode->i_sb)->super_copy); + io_bio->csum = orig_io_bio->csum + csum_size * file_offset; return 0; } diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 606f26d862dc..cc3ada12848d 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -324,6 +324,8 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) if (full_path == NULL) goto cdda_exit; + convert_delimiter(full_path, '\\'); + cifs_dbg(FYI, "%s: full_path: %s\n", __func__, full_path); if (!cifs_sb_master_tlink(cifs_sb)) { diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 46ebaf3f0824..fa77fe5258b0 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -530,6 +530,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root) if (tcon->seal) seq_puts(s, ",seal"); + else if (tcon->ses->server->ignore_signature) + seq_puts(s, ",signloosely"); if (tcon->nocase) seq_puts(s, ",nocase"); if (tcon->local_lease) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index de82cfa44b1a..0d956360e984 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1281,6 +1281,7 @@ struct cifs_fid { __u64 volatile_fid; /* volatile file id for smb2 */ __u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for smb2 */ __u8 create_guid[16]; + __u32 access; struct cifs_pending_open *pending_open; unsigned int epoch; #ifdef CONFIG_CIFS_DEBUG2 @@ -1741,6 +1742,12 @@ static inline bool is_retryable_error(int error) return false; } + +/* cifs_get_writable_file() flags */ +#define FIND_WR_ANY 0 +#define FIND_WR_FSUID_ONLY 1 +#define FIND_WR_WITH_DELETE 2 + #define MID_FREE 0 #define MID_REQUEST_ALLOCATED 1 #define MID_REQUEST_SUBMITTED 2 diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 89eaaf46d1ca..e5cb681ec138 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -134,11 +134,12 @@ extern bool backup_cred(struct cifs_sb_info *); extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof); extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, unsigned int bytes_written); -extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool); +extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, int); extern int cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, - bool fsuid_only, + int flags, struct cifsFileInfo **ret_file); extern int cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, + int flags, struct cifsFileInfo **ret_file); extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); extern int cifs_get_readable_path(struct cifs_tcon *tcon, const char *name, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 3c89569e7210..6f6fb3606a5d 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1492,6 +1492,7 @@ openRetry: *oplock = rsp->OplockLevel; /* cifs fid stays in le */ oparms->fid->netfid = rsp->Fid; + oparms->fid->access = desired_access; /* Let caller know file was created so we can set the mode. */ /* Do we care about the CreateAction in any other cases? */ @@ -2115,7 +2116,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata) wdata2->tailsz = tailsz; wdata2->bytes = cur_len; - rc = cifs_get_writable_file(CIFS_I(inode), false, + rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &wdata2->cfile); if (!wdata2->cfile) { cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n", diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 0ef099442f20..36e7b2fd2190 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -555,7 +555,6 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, if (server->ops->close) server->ops->close(xid, tcon, &fid); cifs_del_pending_open(&open); - fput(file); rc = -ENOMEM; } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index bc9516ab4b34..3b942ecdd4be 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1958,7 +1958,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, /* Return -EBADF if no handle is found and general rc otherwise */ int -cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only, +cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, struct cifsFileInfo **ret_file) { struct cifsFileInfo *open_file, *inv_file = NULL; @@ -1966,7 +1966,8 @@ cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only, bool any_available = false; int rc = -EBADF; unsigned int refind = 0; - + bool fsuid_only = flags & FIND_WR_FSUID_ONLY; + bool with_delete = flags & FIND_WR_WITH_DELETE; *ret_file = NULL; /* @@ -1998,6 +1999,8 @@ refind_writable: continue; if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) continue; + if (with_delete && !(open_file->fid.access & DELETE)) + continue; if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { if (!open_file->invalidHandle) { /* found a good writable file */ @@ -2045,12 +2048,12 @@ refind_writable: } struct cifsFileInfo * -find_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only) +find_writable_file(struct cifsInodeInfo *cifs_inode, int flags) { struct cifsFileInfo *cfile; int rc; - rc = cifs_get_writable_file(cifs_inode, fsuid_only, &cfile); + rc = cifs_get_writable_file(cifs_inode, flags, &cfile); if (rc) cifs_dbg(FYI, "couldn't find writable handle rc=%d", rc); @@ -2059,6 +2062,7 @@ find_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only) int cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, + int flags, struct cifsFileInfo **ret_file) { struct list_head *tmp; @@ -2085,7 +2089,7 @@ cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, kfree(full_path); cinode = CIFS_I(d_inode(cfile->dentry)); spin_unlock(&tcon->open_file_lock); - return cifs_get_writable_file(cinode, 0, ret_file); + return cifs_get_writable_file(cinode, flags, ret_file); } spin_unlock(&tcon->open_file_lock); @@ -2162,7 +2166,8 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) if (mapping->host->i_size - offset < (loff_t)to) to = (unsigned)(mapping->host->i_size - offset); - rc = cifs_get_writable_file(CIFS_I(mapping->host), false, &open_file); + rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY, + &open_file); if (!rc) { bytes_written = cifs_write(open_file, open_file->pid, write_data, to - from, &offset); @@ -2355,7 +2360,7 @@ retry: if (cfile) cifsFileInfo_put(cfile); - rc = cifs_get_writable_file(CIFS_I(inode), false, &cfile); + rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile); /* in case of an error store it to return later */ if (rc) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index b5e6635c578e..1e8a4b1579db 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -653,8 +653,8 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, */ if ((fattr->cf_nlink < 1) && !tcon->unix_ext && !info->DeletePending) { - cifs_dbg(1, "bogus file nlink value %u\n", - fattr->cf_nlink); + cifs_dbg(VFS, "bogus file nlink value %u\n", + fattr->cf_nlink); fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK; } } @@ -2073,6 +2073,7 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry) struct inode *inode = d_inode(dentry); struct super_block *sb = dentry->d_sb; char *full_path = NULL; + int count = 0; if (inode == NULL) return -ENOENT; @@ -2094,15 +2095,18 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry) full_path, inode, inode->i_count.counter, dentry, cifs_get_time(dentry), jiffies); +again: if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext) rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid); else rc = cifs_get_inode_info(&inode, full_path, NULL, sb, xid, NULL); - + if (rc == -EAGAIN && count++ < 10) + goto again; out: kfree(full_path); free_xid(xid); + return rc; } @@ -2278,7 +2282,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, * writebehind data than the SMB timeout for the SetPathInfo * request would allow */ - open_file = find_writable_file(cifsInode, true); + open_file = find_writable_file(cifsInode, FIND_WR_FSUID_ONLY); if (open_file) { tcon = tlink_tcon(open_file->tlink); server = tcon->ses->server; @@ -2428,7 +2432,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) args->ctime = NO_CHANGE_64; args->device = 0; - open_file = find_writable_file(cifsInode, true); + open_file = find_writable_file(cifsInode, FIND_WR_FSUID_ONLY); if (open_file) { u16 nfid = open_file->fid.netfid; u32 npid = open_file->pid; @@ -2531,7 +2535,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) rc = 0; if (attrs->ia_valid & ATTR_MTIME) { - rc = cifs_get_writable_file(cifsInode, false, &wfile); + rc = cifs_get_writable_file(cifsInode, FIND_WR_ANY, &wfile); if (!rc) { tcon = tlink_tcon(wfile->tlink); rc = tcon->ses->server->ops->flush(xid, tcon, &wfile->fid); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index eb994e313c6a..b130efaf8feb 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -766,7 +766,7 @@ smb_set_file_info(struct inode *inode, const char *full_path, struct cifs_tcon *tcon; /* if the file is already open for write, just use that fileid */ - open_file = find_writable_file(cinode, true); + open_file = find_writable_file(cinode, FIND_WR_FSUID_ONLY); if (open_file) { fid.netfid = open_file->fid.netfid; netpid = open_file->pid; diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 1cf207564ff9..a8c301ae00ed 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -521,7 +521,7 @@ smb2_mkdir_setinfo(struct inode *inode, const char *name, cifs_i = CIFS_I(inode); dosattrs = cifs_i->cifsAttrs | ATTR_READONLY; data.Attributes = cpu_to_le32(dosattrs); - cifs_get_writable_path(tcon, name, &cfile); + cifs_get_writable_path(tcon, name, FIND_WR_ANY, &cfile); tmprc = smb2_compound_op(xid, tcon, cifs_sb, name, FILE_WRITE_ATTRIBUTES, FILE_CREATE, CREATE_NOT_FILE, ACL_NO_MODE, @@ -577,7 +577,7 @@ smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon, { struct cifsFileInfo *cfile; - cifs_get_writable_path(tcon, from_name, &cfile); + cifs_get_writable_path(tcon, from_name, FIND_WR_WITH_DELETE, &cfile); return smb2_set_path_attr(xid, tcon, from_name, to_name, cifs_sb, DELETE, SMB2_OP_RENAME, cfile); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index e47190cae163..c31e84ee3c39 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1364,6 +1364,7 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) cfile->fid.persistent_fid = fid->persistent_fid; cfile->fid.volatile_fid = fid->volatile_fid; + cfile->fid.access = fid->access; #ifdef CONFIG_CIFS_DEBUG2 cfile->fid.mid = fid->mid; #endif /* CIFS_DEBUG2 */ @@ -3327,7 +3328,7 @@ static loff_t smb3_llseek(struct file *file, struct cifs_tcon *tcon, loff_t offs * some servers (Windows2016) will not reflect recent writes in * QUERY_ALLOCATED_RANGES until SMB2_flush is called. */ - wrcfile = find_writable_file(cifsi, false); + wrcfile = find_writable_file(cifsi, FIND_WR_ANY); if (wrcfile) { filemap_write_and_wait(inode->i_mapping); smb2_flush_file(xid, tcon, &wrcfile->fid); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 1234f9ccab03..28c0be5e69b7 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2771,6 +2771,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, atomic_inc(&tcon->num_remote_opens); oparms->fid->persistent_fid = rsp->PersistentFileId; oparms->fid->volatile_fid = rsp->VolatileFileId; + oparms->fid->access = oparms->desired_access; #ifdef CONFIG_CIFS_DEBUG2 oparms->fid->mid = le64_to_cpu(rsp->sync_hdr.MessageId); #endif /* CIFS_DEBUG2 */ diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 65cb09fa6ead..08c9f216a54d 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -539,6 +539,15 @@ int fscrypt_drop_inode(struct inode *inode) mk = ci->ci_master_key->payload.data[0]; /* + * With proper, non-racy use of FS_IOC_REMOVE_ENCRYPTION_KEY, all inodes + * protected by the key were cleaned by sync_filesystem(). But if + * userspace is still using the files, inodes can be dirtied between + * then and now. We mustn't lose any writes, so skip dirty inodes here. + */ + if (inode->i_state & I_DIRTY_ALL) + return 0; + + /* * Note: since we aren't holding ->mk_secret_sem, the result here can * immediately become outdated. But there's no correctness problem with * unnecessarily evicting. Nor is there a correctness problem with not diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 634b09d18b77..db987b5110a9 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -1090,21 +1090,12 @@ static const struct file_operations fops_regset32 = { * This function creates a file in debugfs with the given name that reports * the names and values of a set of 32-bit registers. If the @mode variable * is so set it can be read from. Writing is not supported. - * - * This function will return a pointer to a dentry if it succeeds. This - * pointer must be passed to the debugfs_remove() function when the file is - * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, ERR_PTR(-ERROR) will be - * returned. - * - * If debugfs is not enabled in the kernel, the value ERR_PTR(-ENODEV) will - * be returned. */ -struct dentry *debugfs_create_regset32(const char *name, umode_t mode, - struct dentry *parent, - struct debugfs_regset32 *regset) +void debugfs_create_regset32(const char *name, umode_t mode, + struct dentry *parent, + struct debugfs_regset32 *regset) { - return debugfs_create_file(name, mode, parent, regset, &fops_regset32); + debugfs_create_file(name, mode, parent, regset, &fops_regset32); } EXPORT_SYMBOL_GPL(debugfs_create_regset32); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ff1b764b0c0e..0c7c4adb664e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2391,7 +2391,7 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct flex_groups **old_groups, **new_groups; - int size, i; + int size, i, j; if (!sbi->s_log_groups_per_flex) return 0; @@ -2412,8 +2412,8 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) sizeof(struct flex_groups)), GFP_KERNEL); if (!new_groups[i]) { - for (i--; i >= sbi->s_flex_groups_allocated; i--) - kvfree(new_groups[i]); + for (j = sbi->s_flex_groups_allocated; j < i; j++) + kvfree(new_groups[j]); kvfree(new_groups); ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups", size); diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 594b05ae16c9..71946da84388 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -750,6 +750,13 @@ static struct inode *fat_alloc_inode(struct super_block *sb) return NULL; init_rwsem(&ei->truncate_lock); + /* Zeroing to allow iput() even if partial initialized inode. */ + ei->mmu_private = 0; + ei->i_start = 0; + ei->i_logstart = 0; + ei->i_attrs = 0; + ei->i_pos = 0; + return &ei->vfs_inode; } @@ -1374,16 +1381,6 @@ out: return 0; } -static void fat_dummy_inode_init(struct inode *inode) -{ - /* Initialize this dummy inode to work as no-op. */ - MSDOS_I(inode)->mmu_private = 0; - MSDOS_I(inode)->i_start = 0; - MSDOS_I(inode)->i_logstart = 0; - MSDOS_I(inode)->i_attrs = 0; - MSDOS_I(inode)->i_pos = 0; -} - static int fat_read_root(struct inode *inode) { struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); @@ -1844,13 +1841,11 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, fat_inode = new_inode(sb); if (!fat_inode) goto out_fail; - fat_dummy_inode_init(fat_inode); sbi->fat_inode = fat_inode; fsinfo_inode = new_inode(sb); if (!fsinfo_inode) goto out_fail; - fat_dummy_inode_init(fsinfo_inode); fsinfo_inode->i_ino = MSDOS_FSINFO_INO; sbi->fsinfo_inode = fsinfo_inode; insert_inode_hash(fsinfo_inode); diff --git a/fs/fcntl.c b/fs/fcntl.c index 9bc167562ee8..2e4c0fa2074b 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -735,8 +735,9 @@ static void send_sigio_to_task(struct task_struct *p, return; switch (signum) { - kernel_siginfo_t si; - default: + default: { + kernel_siginfo_t si; + /* Queue a rt signal with the appropriate fd as its value. We use SI_SIGIO as the source, not SI_KERNEL, since kernel signals always get @@ -769,6 +770,7 @@ static void send_sigio_to_task(struct task_struct *p, si.si_fd = fd; if (!do_send_sig_info(signum, &si, p, type)) break; + } /* fall-through - fall back on the old plain SIGIO signal */ case 0: do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type); diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 2716d56ed0a0..8294851a9dd9 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1248,7 +1248,7 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, if (!(file->f_mode & FMODE_OPENED)) return finish_no_open(file, d); dput(d); - return 0; + return excl && (flags & O_CREAT) ? -EEXIST : 0; } BUG_ON(d != NULL); diff --git a/fs/io-wq.c b/fs/io-wq.c index 0a5ab1a8f69a..5cef075c0b37 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -502,7 +502,7 @@ next: if (worker->mm) work->flags |= IO_WQ_WORK_HAS_MM; - if (wq->get_work && !(work->flags & IO_WQ_WORK_INTERNAL)) { + if (wq->get_work) { put_work = work; wq->get_work(work); } @@ -535,42 +535,23 @@ next: } while (1); } -static inline void io_worker_spin_for_work(struct io_wqe *wqe) -{ - int i = 0; - - while (++i < 1000) { - if (io_wqe_run_queue(wqe)) - break; - if (need_resched()) - break; - cpu_relax(); - } -} - static int io_wqe_worker(void *data) { struct io_worker *worker = data; struct io_wqe *wqe = worker->wqe; struct io_wq *wq = wqe->wq; - bool did_work; io_worker_start(wqe, worker); - did_work = false; while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) { set_current_state(TASK_INTERRUPTIBLE); loop: - if (did_work) - io_worker_spin_for_work(wqe); spin_lock_irq(&wqe->lock); if (io_wqe_run_queue(wqe)) { __set_current_state(TASK_RUNNING); io_worker_handle_work(worker); - did_work = true; goto loop; } - did_work = false; /* drops the lock on success, retry */ if (__io_worker_idle(wqe, worker)) { __release(&wqe->lock); @@ -766,6 +747,17 @@ static bool io_wq_can_queue(struct io_wqe *wqe, struct io_wqe_acct *acct, return true; } +static void io_run_cancel(struct io_wq_work *work) +{ + do { + struct io_wq_work *old_work = work; + + work->flags |= IO_WQ_WORK_CANCEL; + work->func(&work); + work = (work == old_work) ? NULL : work; + } while (work); +} + static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) { struct io_wqe_acct *acct = io_work_get_acct(wqe, work); @@ -779,8 +771,7 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) * It's close enough to not be an issue, fork() has the same delay. */ if (unlikely(!io_wq_can_queue(wqe, acct, work))) { - work->flags |= IO_WQ_WORK_CANCEL; - work->func(&work); + io_run_cancel(work); return; } @@ -919,8 +910,7 @@ static enum io_wq_cancel io_wqe_cancel_cb_work(struct io_wqe *wqe, spin_unlock_irqrestore(&wqe->lock, flags); if (found) { - work->flags |= IO_WQ_WORK_CANCEL; - work->func(&work); + io_run_cancel(work); return IO_WQ_CANCEL_OK; } @@ -995,8 +985,7 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe, spin_unlock_irqrestore(&wqe->lock, flags); if (found) { - work->flags |= IO_WQ_WORK_CANCEL; - work->func(&work); + io_run_cancel(work); return IO_WQ_CANCEL_OK; } @@ -1068,42 +1057,6 @@ enum io_wq_cancel io_wq_cancel_pid(struct io_wq *wq, pid_t pid) return ret; } -struct io_wq_flush_data { - struct io_wq_work work; - struct completion done; -}; - -static void io_wq_flush_func(struct io_wq_work **workptr) -{ - struct io_wq_work *work = *workptr; - struct io_wq_flush_data *data; - - data = container_of(work, struct io_wq_flush_data, work); - complete(&data->done); -} - -/* - * Doesn't wait for previously queued work to finish. When this completes, - * it just means that previously queued work was started. - */ -void io_wq_flush(struct io_wq *wq) -{ - struct io_wq_flush_data data; - int node; - - for_each_node(node) { - struct io_wqe *wqe = wq->wqes[node]; - - if (!node_online(node)) - continue; - init_completion(&data.done); - INIT_IO_WORK(&data.work, io_wq_flush_func); - data.work.flags |= IO_WQ_WORK_INTERNAL; - io_wqe_enqueue(wqe, &data.work); - wait_for_completion(&data.done); - } -} - struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) { int ret = -ENOMEM, node; diff --git a/fs/io-wq.h b/fs/io-wq.h index ccc7d84af57d..e5e15f2c93ec 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -8,7 +8,6 @@ enum { IO_WQ_WORK_HAS_MM = 2, IO_WQ_WORK_HASHED = 4, IO_WQ_WORK_UNBOUND = 32, - IO_WQ_WORK_INTERNAL = 64, IO_WQ_WORK_CB = 128, IO_WQ_WORK_NO_CANCEL = 256, IO_WQ_WORK_CONCURRENT = 512, @@ -79,16 +78,10 @@ struct io_wq_work { pid_t task_pid; }; -#define INIT_IO_WORK(work, _func) \ - do { \ - (work)->list.next = NULL; \ - (work)->func = _func; \ - (work)->files = NULL; \ - (work)->mm = NULL; \ - (work)->creds = NULL; \ - (work)->fs = NULL; \ - (work)->flags = 0; \ - } while (0) \ +#define INIT_IO_WORK(work, _func) \ + do { \ + *(work) = (struct io_wq_work){ .func = _func }; \ + } while (0) \ typedef void (get_work_fn)(struct io_wq_work *); typedef void (put_work_fn)(struct io_wq_work *); @@ -106,7 +99,6 @@ void io_wq_destroy(struct io_wq *wq); void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work); void io_wq_enqueue_hashed(struct io_wq *wq, struct io_wq_work *work, void *val); -void io_wq_flush(struct io_wq *wq); void io_wq_cancel_all(struct io_wq *wq); enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork); diff --git a/fs/io_uring.c b/fs/io_uring.c index de650df9ac53..c06082bb039a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -183,19 +183,15 @@ struct fixed_file_table { struct file **files; }; -enum { - FFD_F_ATOMIC, -}; - struct fixed_file_data { struct fixed_file_table *table; struct io_ring_ctx *ctx; struct percpu_ref refs; struct llist_head put_llist; - unsigned long state; struct work_struct ref_work; struct completion done; + struct rcu_head rcu; }; struct io_ring_ctx { @@ -1004,6 +1000,7 @@ static void io_kill_timeout(struct io_kiocb *req) if (ret != -1) { atomic_inc(&req->ctx->cq_timeouts); list_del_init(&req->list); + req->flags |= REQ_F_COMP_LOCKED; io_cqring_fill_event(req, 0); io_put_req(req); } @@ -1483,10 +1480,10 @@ static void io_free_req(struct io_kiocb *req) __attribute__((nonnull)) static void io_put_req_find_next(struct io_kiocb *req, struct io_kiocb **nxtptr) { - io_req_find_next(req, nxtptr); - - if (refcount_dec_and_test(&req->refs)) + if (refcount_dec_and_test(&req->refs)) { + io_req_find_next(req, nxtptr); __io_free_req(req); + } } static void io_put_req(struct io_kiocb *req) @@ -1821,6 +1818,10 @@ static void io_iopoll_req_issued(struct io_kiocb *req) list_add(&req->list, &ctx->poll_list); else list_add_tail(&req->list, &ctx->poll_list); + + if ((ctx->flags & IORING_SETUP_SQPOLL) && + wq_has_sleeper(&ctx->sqo_wait)) + wake_up(&ctx->sqo_wait); } static void io_file_put(struct io_submit_state *state) @@ -2071,7 +2072,7 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req, ssize_t ret; ret = import_single_range(rw, buf, sqe_len, *iovec, iter); *iovec = NULL; - return ret; + return ret < 0 ? ret : sqe_len; } if (req->io) { @@ -3002,6 +3003,11 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->len = READ_ONCE(sqe->len); +#ifdef CONFIG_COMPAT + if (req->ctx->compat) + sr->msg_flags |= MSG_CMSG_COMPAT; +#endif + if (!io || req->opcode == IORING_OP_SEND) return 0; /* iovec is already imported */ @@ -3154,6 +3160,11 @@ static int io_recvmsg_prep(struct io_kiocb *req, sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->len = READ_ONCE(sqe->len); +#ifdef CONFIG_COMPAT + if (req->ctx->compat) + sr->msg_flags |= MSG_CMSG_COMPAT; +#endif + if (!io || req->opcode == IORING_OP_RECV) return 0; /* iovec is already imported */ @@ -4705,11 +4716,21 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_kiocb *linked_timeout; struct io_kiocb *nxt = NULL; + const struct cred *old_creds = NULL; int ret; again: linked_timeout = io_prep_linked_timeout(req); + if (req->work.creds && req->work.creds != current_cred()) { + if (old_creds) + revert_creds(old_creds); + if (old_creds == req->work.creds) + old_creds = NULL; /* restored original creds */ + else + old_creds = override_creds(req->work.creds); + } + ret = io_issue_sqe(req, sqe, &nxt, true); /* @@ -4735,7 +4756,7 @@ punt: err: /* drop submission reference */ - io_put_req(req); + io_put_req_find_next(req, &nxt); if (linked_timeout) { if (!ret) @@ -4759,6 +4780,8 @@ done_req: goto punt; goto again; } + if (old_creds) + revert_creds(old_creds); } static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe) @@ -4803,7 +4826,6 @@ static inline void io_queue_link_head(struct io_kiocb *req) static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, struct io_submit_state *state, struct io_kiocb **link) { - const struct cred *old_creds = NULL; struct io_ring_ctx *ctx = req->ctx; unsigned int sqe_flags; int ret, id; @@ -4818,14 +4840,12 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, id = READ_ONCE(sqe->personality); if (id) { - const struct cred *personality_creds; - - personality_creds = idr_find(&ctx->personality_idr, id); - if (unlikely(!personality_creds)) { + req->work.creds = idr_find(&ctx->personality_idr, id); + if (unlikely(!req->work.creds)) { ret = -EINVAL; goto err_req; } - old_creds = override_creds(personality_creds); + get_cred(req->work.creds); } /* same numerical values with corresponding REQ_F_*, safe to copy */ @@ -4837,8 +4857,6 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, err_req: io_cqring_add_event(req, ret); io_double_put_req(req); - if (old_creds) - revert_creds(old_creds); return false; } @@ -4899,8 +4917,6 @@ err_req: } } - if (old_creds) - revert_creds(old_creds); return true; } @@ -5081,9 +5097,8 @@ static int io_sq_thread(void *data) const struct cred *old_cred; mm_segment_t old_fs; DEFINE_WAIT(wait); - unsigned inflight; unsigned long timeout; - int ret; + int ret = 0; complete(&ctx->completions[1]); @@ -5091,39 +5106,19 @@ static int io_sq_thread(void *data) set_fs(USER_DS); old_cred = override_creds(ctx->creds); - ret = timeout = inflight = 0; + timeout = jiffies + ctx->sq_thread_idle; while (!kthread_should_park()) { unsigned int to_submit; - if (inflight) { + if (!list_empty(&ctx->poll_list)) { unsigned nr_events = 0; - if (ctx->flags & IORING_SETUP_IOPOLL) { - /* - * inflight is the count of the maximum possible - * entries we submitted, but it can be smaller - * if we dropped some of them. If we don't have - * poll entries available, then we know that we - * have nothing left to poll for. Reset the - * inflight count to zero in that case. - */ - mutex_lock(&ctx->uring_lock); - if (!list_empty(&ctx->poll_list)) - io_iopoll_getevents(ctx, &nr_events, 0); - else - inflight = 0; - mutex_unlock(&ctx->uring_lock); - } else { - /* - * Normal IO, just pretend everything completed. - * We don't have to poll completions for that. - */ - nr_events = inflight; - } - - inflight -= nr_events; - if (!inflight) + mutex_lock(&ctx->uring_lock); + if (!list_empty(&ctx->poll_list)) + io_iopoll_getevents(ctx, &nr_events, 0); + else timeout = jiffies + ctx->sq_thread_idle; + mutex_unlock(&ctx->uring_lock); } to_submit = io_sqring_entries(ctx); @@ -5152,7 +5147,7 @@ static int io_sq_thread(void *data) * more IO, we should wait for the application to * reap events and wake us up. */ - if (inflight || + if (!list_empty(&ctx->poll_list) || (!time_after(jiffies, timeout) && ret != -EBUSY && !percpu_ref_is_dying(&ctx->refs))) { cond_resched(); @@ -5162,6 +5157,19 @@ static int io_sq_thread(void *data) prepare_to_wait(&ctx->sqo_wait, &wait, TASK_INTERRUPTIBLE); + /* + * While doing polled IO, before going to sleep, we need + * to check if there are new reqs added to poll_list, it + * is because reqs may have been punted to io worker and + * will be added to poll_list later, hence check the + * poll_list again. + */ + if ((ctx->flags & IORING_SETUP_IOPOLL) && + !list_empty_careful(&ctx->poll_list)) { + finish_wait(&ctx->sqo_wait, &wait); + continue; + } + /* Tell userspace we may need a wakeup call */ ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP; /* make sure to read SQ tail after writing flags */ @@ -5189,8 +5197,7 @@ static int io_sq_thread(void *data) mutex_lock(&ctx->uring_lock); ret = io_submit_sqes(ctx, to_submit, NULL, -1, &cur_mm, true); mutex_unlock(&ctx->uring_lock); - if (ret > 0) - inflight += ret; + timeout = jiffies + ctx->sq_thread_idle; } set_fs(old_fs); @@ -5324,6 +5331,26 @@ static void io_file_ref_kill(struct percpu_ref *ref) complete(&data->done); } +static void __io_file_ref_exit_and_free(struct rcu_head *rcu) +{ + struct fixed_file_data *data = container_of(rcu, struct fixed_file_data, + rcu); + percpu_ref_exit(&data->refs); + kfree(data); +} + +static void io_file_ref_exit_and_free(struct rcu_head *rcu) +{ + /* + * We need to order our exit+free call against the potentially + * existing call_rcu() for switching to atomic. One way to do that + * is to have this rcu callback queue the final put and free, as we + * could otherwise have a pre-existing atomic switch complete _after_ + * the free callback we queued. + */ + call_rcu(rcu, __io_file_ref_exit_and_free); +} + static int io_sqe_files_unregister(struct io_ring_ctx *ctx) { struct fixed_file_data *data = ctx->file_data; @@ -5336,14 +5363,13 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) flush_work(&data->ref_work); wait_for_completion(&data->done); io_ring_file_ref_flush(data); - percpu_ref_exit(&data->refs); __io_sqe_files_unregister(ctx); nr_tables = DIV_ROUND_UP(ctx->nr_user_files, IORING_MAX_FILES_TABLE); for (i = 0; i < nr_tables; i++) kfree(data->table[i].files); kfree(data->table); - kfree(data); + call_rcu(&data->rcu, io_file_ref_exit_and_free); ctx->file_data = NULL; ctx->nr_user_files = 0; return 0; @@ -5595,7 +5621,6 @@ static void io_ring_file_ref_switch(struct work_struct *work) data = container_of(work, struct fixed_file_data, ref_work); io_ring_file_ref_flush(data); - percpu_ref_get(&data->refs); percpu_ref_switch_to_percpu(&data->refs); } @@ -5771,8 +5796,13 @@ static void io_atomic_switch(struct percpu_ref *ref) { struct fixed_file_data *data; + /* + * Juggle reference to ensure we hit zero, if needed, so we can + * switch back to percpu mode + */ data = container_of(ref, struct fixed_file_data, refs); - clear_bit(FFD_F_ATOMIC, &data->state); + percpu_ref_put(&data->refs); + percpu_ref_get(&data->refs); } static bool io_queue_file_removal(struct fixed_file_data *data, @@ -5795,11 +5825,7 @@ static bool io_queue_file_removal(struct fixed_file_data *data, llist_add(&pfile->llist, &data->put_llist); if (pfile == &pfile_stack) { - if (!test_and_set_bit(FFD_F_ATOMIC, &data->state)) { - percpu_ref_put(&data->refs); - percpu_ref_switch_to_atomic(&data->refs, - io_atomic_switch); - } + percpu_ref_switch_to_atomic(&data->refs, io_atomic_switch); wait_for_completion(&done); flush_work(&data->ref_work); return false; @@ -5873,10 +5899,8 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, up->offset++; } - if (ref_switch && !test_and_set_bit(FFD_F_ATOMIC, &data->state)) { - percpu_ref_put(&data->refs); + if (ref_switch) percpu_ref_switch_to_atomic(&data->refs, io_atomic_switch); - } return done ? done : err; } @@ -6334,6 +6358,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) io_sqe_buffer_unregister(ctx); io_sqe_files_unregister(ctx); io_eventfd_unregister(ctx); + idr_destroy(&ctx->personality_idr); #if defined(CONFIG_UNIX) if (ctx->ring_sock) { @@ -6647,6 +6672,7 @@ out_fput: return submitted ? submitted : ret; } +#ifdef CONFIG_PROC_FS static int io_uring_show_cred(int id, void *p, void *data) { const struct cred *cred = p; @@ -6720,6 +6746,7 @@ static void io_uring_show_fdinfo(struct seq_file *m, struct file *f) percpu_ref_put(&ctx->refs); } } +#endif static const struct file_operations io_uring_fops = { .release = io_uring_release, @@ -6731,7 +6758,9 @@ static const struct file_operations io_uring_fops = { #endif .poll = io_uring_poll, .fasync = io_uring_fasync, +#ifdef CONFIG_PROC_FS .show_fdinfo = io_uring_show_fdinfo, +#endif }; static int io_allocate_scq_urings(struct io_ring_ctx *ctx, diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index d181948c0390..3dccc23cf010 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1150,8 +1150,8 @@ static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh, /* For undo access buffer must have data copied */ if (undo && !jh->b_committed_data) goto out; - if (jh->b_transaction != handle->h_transaction && - jh->b_next_transaction != handle->h_transaction) + if (READ_ONCE(jh->b_transaction) != handle->h_transaction && + READ_ONCE(jh->b_next_transaction) != handle->h_transaction) goto out; /* * There are two reasons for the barrier here: @@ -2569,8 +2569,8 @@ bool __jbd2_journal_refile_buffer(struct journal_head *jh) * our jh reference and thus __jbd2_journal_file_buffer() must not * take a new one. */ - jh->b_transaction = jh->b_next_transaction; - jh->b_next_transaction = NULL; + WRITE_ONCE(jh->b_transaction, jh->b_next_transaction); + WRITE_ONCE(jh->b_next_transaction, NULL); if (buffer_freed(bh)) jlist = BJ_Forget; else if (jh->b_modified) diff --git a/fs/locks.c b/fs/locks.c index 44b6da032842..426b55d333d5 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -753,20 +753,6 @@ int locks_delete_block(struct file_lock *waiter) { int status = -ENOENT; - /* - * If fl_blocker is NULL, it won't be set again as this thread - * "owns" the lock and is the only one that might try to claim - * the lock. So it is safe to test fl_blocker locklessly. - * Also if fl_blocker is NULL, this waiter is not listed on - * fl_blocked_requests for some lock, so no other request can - * be added to the list of fl_blocked_requests for this - * request. So if fl_blocker is NULL, it is safe to - * locklessly check if fl_blocked_requests is empty. If both - * of these checks succeed, there is no need to take the lock. - */ - if (waiter->fl_blocker == NULL && - list_empty(&waiter->fl_blocked_requests)) - return status; spin_lock(&blocked_lock_lock); if (waiter->fl_blocker) status = 0; diff --git a/fs/nsfs.c b/fs/nsfs.c index b13bfd406820..4f1205725cfe 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -247,6 +247,20 @@ out_invalid: return ERR_PTR(-EINVAL); } +/** + * ns_match() - Returns true if current namespace matches dev/ino provided. + * @ns_common: current ns + * @dev: dev_t from nsfs that will be matched against current nsfs + * @ino: ino_t from nsfs that will be matched against current nsfs + * + * Return: true if dev and ino matches the current nsfs. + */ +bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino) +{ + return (ns->inum == ino) && (nsfs_mnt->mnt_sb->s_dev == dev); +} + + static int nsfs_show_path(struct seq_file *seq, struct dentry *dentry) { struct inode *inode = d_inode(dentry); diff --git a/fs/open.c b/fs/open.c index 0788b3715731..b69d6eed67e6 100644 --- a/fs/open.c +++ b/fs/open.c @@ -860,9 +860,6 @@ cleanup_file: * the return value of d_splice_alias(), then the caller needs to perform dput() * on it after finish_open(). * - * On successful return @file is a fully instantiated open file. After this, if - * an error occurs in ->atomic_open(), it needs to clean up with fput(). - * * Returns zero on success or -errno if the open failed. */ int finish_open(struct file *file, struct dentry *dentry, diff --git a/fs/zonefs/Kconfig b/fs/zonefs/Kconfig index fb87ad372e29..ef2697b78820 100644 --- a/fs/zonefs/Kconfig +++ b/fs/zonefs/Kconfig @@ -2,6 +2,7 @@ config ZONEFS_FS tristate "zonefs filesystem support" depends on BLOCK depends on BLK_DEV_ZONED + select FS_IOMAP help zonefs is a simple file system which exposes zones of a zoned block device (e.g. host-managed or host-aware SMR disk drives) as files. diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 8bc6ef82d693..69aee3dfb660 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -601,13 +601,13 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) ssize_t ret; /* - * For async direct IOs to sequential zone files, ignore IOCB_NOWAIT + * For async direct IOs to sequential zone files, refuse IOCB_NOWAIT * as this can cause write reordering (e.g. the first aio gets EAGAIN * on the inode lock but the second goes through but is now unaligned). */ - if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !is_sync_kiocb(iocb) - && (iocb->ki_flags & IOCB_NOWAIT)) - iocb->ki_flags &= ~IOCB_NOWAIT; + if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !is_sync_kiocb(iocb) && + (iocb->ki_flags & IOCB_NOWAIT)) + return -EOPNOTSUPP; if (iocb->ki_flags & IOCB_NOWAIT) { if (!inode_trylock(inode)) diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index a2583c2bc054..4defed58ea33 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -532,11 +532,12 @@ typedef u64 acpi_integer; strnlen (a, ACPI_NAMESEG_SIZE) == ACPI_NAMESEG_SIZE) /* - * Algorithm to obtain access bit width. + * Algorithm to obtain access bit or byte width. * Can be used with access_width of struct acpi_generic_address and access_size of * struct acpi_resource_generic_register. */ #define ACPI_ACCESS_BIT_WIDTH(size) (1 << ((size) + 2)) +#define ACPI_ACCESS_BYTE_WIDTH(size) (1 << ((size) - 1)) /******************************************************************************* * diff --git a/include/crypto/curve25519.h b/include/crypto/curve25519.h index 4e6dc840b159..9ecb3c1f0f15 100644 --- a/include/crypto/curve25519.h +++ b/include/crypto/curve25519.h @@ -33,7 +33,8 @@ bool __must_check curve25519(u8 mypublic[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE], const u8 basepoint[CURVE25519_KEY_SIZE]) { - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519)) + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519) && + (!IS_ENABLED(CONFIG_CRYPTO_CURVE25519_X86) || IS_ENABLED(CONFIG_AS_ADX))) curve25519_arch(mypublic, secret, basepoint); else curve25519_generic(mypublic, secret, basepoint); @@ -49,7 +50,8 @@ __must_check curve25519_generate_public(u8 pub[CURVE25519_KEY_SIZE], CURVE25519_KEY_SIZE))) return false; - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519)) + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519) && + (!IS_ENABLED(CONFIG_CRYPTO_CURVE25519_X86) || IS_ENABLED(CONFIG_AS_ADX))) curve25519_base_arch(pub, secret); else curve25519_generic(pub, secret, curve25519_base_point); diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h index bcb39da9adb4..41725d88d27e 100644 --- a/include/drm/drm_dp_mst_helper.h +++ b/include/drm/drm_dp_mst_helper.h @@ -81,7 +81,7 @@ struct drm_dp_vcpi { * &drm_dp_mst_topology_mgr.base.lock. * @num_sdp_stream_sinks: Number of stream sinks. Protected by * &drm_dp_mst_topology_mgr.base.lock. - * @available_pbn: Available bandwidth for this port. Protected by + * @full_pbn: Max possible bandwidth for this port. Protected by * &drm_dp_mst_topology_mgr.base.lock. * @next: link to next port on this branch device * @aux: i2c aux transport to talk to device connected to this port, protected @@ -126,7 +126,7 @@ struct drm_dp_mst_port { u8 dpcd_rev; u8 num_sdp_streams; u8 num_sdp_stream_sinks; - uint16_t available_pbn; + uint16_t full_pbn; struct list_head next; /** * @mstb: the branch device connected to this port, if there is one. diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index e34a7b7f848a..294b2931c4cc 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -96,6 +96,11 @@ struct drm_gem_shmem_object { * The address are un-mapped when the count reaches zero. */ unsigned int vmap_use_count; + + /** + * @map_cached: map object cached (instead of using writecombine). + */ + bool map_cached; }; #define to_drm_gem_shmem_obj(obj) \ diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 4bbb5f1c8b5b..48ea093ff04c 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -56,6 +56,19 @@ }) /** + * FIELD_MAX() - produce the maximum value representable by a field + * @_mask: shifted mask defining the field's length and position + * + * FIELD_MAX() returns the maximum value that can be held in the field + * specified by @_mask. + */ +#define FIELD_MAX(_mask) \ + ({ \ + __BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_MAX: "); \ + (typeof(_mask))((_mask) >> __bf_shf(_mask)); \ + }) + +/** * FIELD_FIT() - check if value fits in the field * @_mask: shifted mask defining the field's length and position * @_val: value to test against the field @@ -110,6 +123,7 @@ static __always_inline u64 field_mask(u64 field) { return field / field_multiplier(field); } +#define field_max(field) ((typeof(field))field_mask(field)) #define ____MAKE_OP(type,base,to,from) \ static __always_inline __##type type##_encode_bits(base v, base field) \ { \ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 053ea4b51988..f629d40c645c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -524,7 +524,7 @@ struct request_queue { unsigned int sg_reserved_size; int node; #ifdef CONFIG_BLK_DEV_IO_TRACE - struct blk_trace *blk_trace; + struct blk_trace __rcu *blk_trace; struct mutex blk_trace_mutex; #endif /* @@ -1494,7 +1494,6 @@ static inline void put_dev_sector(Sector p) } int kblockd_schedule_work(struct work_struct *work); -int kblockd_schedule_work_on(int cpu, struct work_struct *work); int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); #define MODULE_ALIAS_BLOCKDEV(major,minor) \ diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 7bb2d8de9f30..3b6ff5902edc 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -51,9 +51,13 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f **/ #define blk_add_cgroup_trace_msg(q, cg, fmt, ...) \ do { \ - struct blk_trace *bt = (q)->blk_trace; \ + struct blk_trace *bt; \ + \ + rcu_read_lock(); \ + bt = rcu_dereference((q)->blk_trace); \ if (unlikely(bt)) \ __trace_note_message(bt, cg, fmt, ##__VA_ARGS__);\ + rcu_read_unlock(); \ } while (0) #define blk_add_trace_msg(q, fmt, ...) \ blk_add_cgroup_trace_msg(q, NULL, fmt, ##__VA_ARGS__) @@ -61,10 +65,14 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f static inline bool blk_trace_note_message_enabled(struct request_queue *q) { - struct blk_trace *bt = q->blk_trace; - if (likely(!bt)) - return false; - return bt->act_mask & BLK_TC_NOTIFY; + struct blk_trace *bt; + bool ret; + + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + ret = bt && (bt->act_mask & BLK_TC_NOTIFY); + rcu_read_unlock(); + return ret; } extern void blk_add_driver_data(struct request_queue *q, struct request *rq, diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6015a4daf118..bdb981c204fa 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -18,6 +18,7 @@ #include <linux/refcount.h> #include <linux/mutex.h> #include <linux/module.h> +#include <linux/kallsyms.h> struct bpf_verifier_env; struct bpf_verifier_log; @@ -433,6 +434,16 @@ struct btf_func_model { */ #define BPF_TRAMP_F_SKIP_FRAME BIT(2) +/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 + * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 + */ +#define BPF_MAX_TRAMP_PROGS 40 + +struct bpf_tramp_progs { + struct bpf_prog *progs[BPF_MAX_TRAMP_PROGS]; + int nr_progs; +}; + /* Different use cases for BPF trampoline: * 1. replace nop at the function entry (kprobe equivalent) * flags = BPF_TRAMP_F_RESTORE_REGS @@ -455,16 +466,25 @@ struct btf_func_model { */ int arch_prepare_bpf_trampoline(void *image, void *image_end, const struct btf_func_model *m, u32 flags, - struct bpf_prog **fentry_progs, int fentry_cnt, - struct bpf_prog **fexit_progs, int fexit_cnt, + struct bpf_tramp_progs *tprogs, void *orig_call); /* these two functions are called from generated trampoline */ u64 notrace __bpf_prog_enter(void); void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); +struct bpf_ksym { + unsigned long start; + unsigned long end; + char name[KSYM_NAME_LEN]; + struct list_head lnode; + struct latch_tree_node tnode; + bool prog; +}; + enum bpf_tramp_prog_type { BPF_TRAMP_FENTRY, BPF_TRAMP_FEXIT, + BPF_TRAMP_MODIFY_RETURN, BPF_TRAMP_MAX, BPF_TRAMP_REPLACE, /* more than MAX */ }; @@ -493,6 +513,7 @@ struct bpf_trampoline { /* Executable image of trampoline */ void *image; u64 selector; + struct bpf_ksym ksym; }; #define BPF_DISPATCHER_MAX 48 /* Fits in 2048B */ @@ -510,9 +531,10 @@ struct bpf_dispatcher { int num_progs; void *image; u32 image_off; + struct bpf_ksym ksym; }; -static __always_inline unsigned int bpf_dispatcher_nopfunc( +static __always_inline unsigned int bpf_dispatcher_nop_func( const void *ctx, const struct bpf_insn *insnsi, unsigned int (*bpf_func)(const void *, @@ -525,17 +547,21 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key); int bpf_trampoline_link_prog(struct bpf_prog *prog); int bpf_trampoline_unlink_prog(struct bpf_prog *prog); void bpf_trampoline_put(struct bpf_trampoline *tr); -#define BPF_DISPATCHER_INIT(name) { \ - .mutex = __MUTEX_INITIALIZER(name.mutex), \ - .func = &name##func, \ - .progs = {}, \ - .num_progs = 0, \ - .image = NULL, \ - .image_off = 0 \ +#define BPF_DISPATCHER_INIT(_name) { \ + .mutex = __MUTEX_INITIALIZER(_name.mutex), \ + .func = &_name##_func, \ + .progs = {}, \ + .num_progs = 0, \ + .image = NULL, \ + .image_off = 0, \ + .ksym = { \ + .name = #_name, \ + .lnode = LIST_HEAD_INIT(_name.ksym.lnode), \ + }, \ } #define DEFINE_BPF_DISPATCHER(name) \ - noinline unsigned int name##func( \ + noinline unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ unsigned int (*bpf_func)(const void *, \ @@ -543,26 +569,26 @@ void bpf_trampoline_put(struct bpf_trampoline *tr); { \ return bpf_func(ctx, insnsi); \ } \ - EXPORT_SYMBOL(name##func); \ - struct bpf_dispatcher name = BPF_DISPATCHER_INIT(name); + EXPORT_SYMBOL(bpf_dispatcher_##name##_func); \ + struct bpf_dispatcher bpf_dispatcher_##name = \ + BPF_DISPATCHER_INIT(bpf_dispatcher_##name); #define DECLARE_BPF_DISPATCHER(name) \ - unsigned int name##func( \ + unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ unsigned int (*bpf_func)(const void *, \ const struct bpf_insn *)); \ - extern struct bpf_dispatcher name; -#define BPF_DISPATCHER_FUNC(name) name##func -#define BPF_DISPATCHER_PTR(name) (&name) + extern struct bpf_dispatcher bpf_dispatcher_##name; +#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func +#define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name) void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to); -struct bpf_image { - struct latch_tree_node tnode; - unsigned char data[]; -}; -#define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image)) -bool is_bpf_image_address(unsigned long address); -void *bpf_image_alloc(void); +/* Called only from JIT-enabled code, so there's no need for stubs. */ +void *bpf_jit_alloc_exec_page(void); +void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym); +void bpf_image_ksym_del(struct bpf_ksym *ksym); +void bpf_ksym_add(struct bpf_ksym *ksym); +void bpf_ksym_del(struct bpf_ksym *ksym); #else static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key) { @@ -579,7 +605,7 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog) static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} #define DEFINE_BPF_DISPATCHER(name) #define DECLARE_BPF_DISPATCHER(name) -#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_nopfunc +#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_nop_func #define BPF_DISPATCHER_PTR(name) NULL static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, @@ -640,8 +666,7 @@ struct bpf_prog_aux { void *jit_data; /* JIT specific data. arch dependent */ struct bpf_jit_poke_descriptor *poke_tab; u32 size_poke_tab; - struct latch_tree_node ksym_tnode; - struct list_head ksym_lnode; + struct bpf_ksym ksym; const struct bpf_prog_ops *ops; struct bpf_map **used_maps; struct bpf_prog *prog; @@ -1056,6 +1081,21 @@ extern int sysctl_unprivileged_bpf_disabled; int bpf_map_new_fd(struct bpf_map *map, int flags); int bpf_prog_new_fd(struct bpf_prog *prog); +struct bpf_link; + +struct bpf_link_ops { + void (*release)(struct bpf_link *link); + void (*dealloc)(struct bpf_link *link); +}; + +void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, + struct bpf_prog *prog); +void bpf_link_inc(struct bpf_link *link); +void bpf_link_put(struct bpf_link *link); +int bpf_link_new_fd(struct bpf_link *link); +struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd); +struct bpf_link *bpf_link_get_from_fd(u32 ufd); + int bpf_obj_pin_user(u32 ufd, const char __user *pathname); int bpf_obj_get_user(const char __user *pathname, int flags); @@ -1133,6 +1173,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +int bpf_prog_test_run_tracing(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr); int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); @@ -1290,6 +1333,13 @@ static inline int bpf_prog_test_run_skb(struct bpf_prog *prog, return -ENOTSUPP; } +static inline int bpf_prog_test_run_tracing(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + return -ENOTSUPP; +} + static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) @@ -1386,6 +1436,8 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map) #if defined(CONFIG_BPF_STREAM_PARSER) int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which); int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); +void sock_map_unhash(struct sock *sk); +void sock_map_close(struct sock *sk, long timeout); #else static inline int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which) @@ -1398,7 +1450,7 @@ static inline int sock_map_get_from_fd(const union bpf_attr *attr, { return -EINVAL; } -#endif +#endif /* CONFIG_BPF_STREAM_PARSER */ #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) void bpf_sk_reuseport_detach(struct sock *sk); @@ -1459,6 +1511,7 @@ extern const struct bpf_func_proto bpf_strtol_proto; extern const struct bpf_func_proto bpf_strtoul_proto; extern const struct bpf_func_proto bpf_tcp_sock_proto; extern const struct bpf_func_proto bpf_jiffies64_proto; +extern const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto; /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index d7ddebd0cdec..e75d2191226b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -62,6 +62,7 @@ struct css_task_iter { struct list_head *mg_tasks_head; struct list_head *dying_tasks_head; + struct list_head *cur_tasks_head; struct css_set *cur_cset; struct css_set *cur_dcset; struct task_struct *cur_task; diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 3d013de64f70..43efcc49f061 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -127,9 +127,9 @@ struct dentry *debugfs_create_blob(const char *name, umode_t mode, struct dentry *parent, struct debugfs_blob_wrapper *blob); -struct dentry *debugfs_create_regset32(const char *name, umode_t mode, - struct dentry *parent, - struct debugfs_regset32 *regset); +void debugfs_create_regset32(const char *name, umode_t mode, + struct dentry *parent, + struct debugfs_regset32 *regset); void debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs, int nregs, void __iomem *base, char *prefix); @@ -304,11 +304,10 @@ static inline struct dentry *debugfs_create_blob(const char *name, umode_t mode, return ERR_PTR(-ENODEV); } -static inline struct dentry *debugfs_create_regset32(const char *name, - umode_t mode, struct dentry *parent, - struct debugfs_regset32 *regset) +static inline void debugfs_create_regset32(const char *name, umode_t mode, + struct dentry *parent, + struct debugfs_regset32 *regset) { - return ERR_PTR(-ENODEV); } static inline void debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs, diff --git a/include/linux/device.h b/include/linux/device.h index 3e40533d2037..1311f276f533 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -798,6 +798,17 @@ static inline struct device_node *dev_of_node(struct device *dev) return dev->of_node; } +static inline bool dev_has_sync_state(struct device *dev) +{ + if (!dev) + return false; + if (dev->driver && dev->driver->sync_state) + return true; + if (dev->bus && dev->bus->sync_state) + return true; + return false; +} + /* * High level routines for use by the bus drivers */ diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 9efeebde3514..c1d379bf6ee1 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -211,6 +211,14 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ) #define ETHTOOL_COALESCE_USE_ADAPTIVE \ (ETHTOOL_COALESCE_USE_ADAPTIVE_RX | ETHTOOL_COALESCE_USE_ADAPTIVE_TX) +#define ETHTOOL_COALESCE_USECS_LOW_HIGH \ + (ETHTOOL_COALESCE_RX_USECS_LOW | ETHTOOL_COALESCE_TX_USECS_LOW | \ + ETHTOOL_COALESCE_RX_USECS_HIGH | ETHTOOL_COALESCE_TX_USECS_HIGH) +#define ETHTOOL_COALESCE_MAX_FRAMES_LOW_HIGH \ + (ETHTOOL_COALESCE_RX_MAX_FRAMES_LOW | \ + ETHTOOL_COALESCE_TX_MAX_FRAMES_LOW | \ + ETHTOOL_COALESCE_RX_MAX_FRAMES_HIGH | \ + ETHTOOL_COALESCE_TX_MAX_FRAMES_HIGH) #define ETHTOOL_COALESCE_PKT_RATE_RX_USECS \ (ETHTOOL_COALESCE_USE_ADAPTIVE_RX | \ ETHTOOL_COALESCE_RX_USECS_LOW | ETHTOOL_COALESCE_RX_USECS_HIGH | \ @@ -450,6 +458,8 @@ struct ethtool_ops { struct ethtool_stats *, u64 *); }; +int ethtool_check_ops(const struct ethtool_ops *ops); + struct ethtool_rx_flow_rule { struct flow_rule *rule; unsigned long priv[0]; diff --git a/include/linux/filter.h b/include/linux/filter.h index 43b5e455d2f5..9b5aa5c483cc 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -577,7 +577,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); ret; }) #define BPF_PROG_RUN(prog, ctx) \ - __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc) + __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func) /* * Use in preemptible and therefore migratable context to make sure that @@ -596,7 +596,7 @@ static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog, u32 ret; migrate_disable(); - ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc); + ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func); migrate_enable(); return ret; } @@ -722,7 +722,7 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, return res; } -DECLARE_BPF_DISPATCHER(bpf_dispatcher_xdp) +DECLARE_BPF_DISPATCHER(xdp) static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, struct xdp_buff *xdp) @@ -733,8 +733,7 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, * already takes rcu_read_lock() when fetching the program, so * it's not necessary here anymore. */ - return __BPF_PROG_RUN(prog, xdp, - BPF_DISPATCHER_FUNC(bpf_dispatcher_xdp)); + return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp)); } void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog); @@ -1084,7 +1083,6 @@ bpf_address_lookup(unsigned long addr, unsigned long *size, void bpf_prog_kallsyms_add(struct bpf_prog *fp); void bpf_prog_kallsyms_del(struct bpf_prog *fp); -void bpf_get_prog_name(const struct bpf_prog *prog, char *sym); #else /* CONFIG_BPF_JIT */ @@ -1153,11 +1151,6 @@ static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp) { } -static inline void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) -{ - sym[0] = '\0'; -} - #endif /* CONFIG_BPF_JIT */ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp); diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index e4ba25d63913..ce9ed1c0602f 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -2,15 +2,10 @@ #ifndef _INET_DIAG_H_ #define _INET_DIAG_H_ 1 +#include <net/netlink.h> #include <uapi/linux/inet_diag.h> -struct net; -struct sock; struct inet_hashinfo; -struct nlattr; -struct nlmsghdr; -struct sk_buff; -struct netlink_callback; struct inet_diag_handler { void (*dump)(struct sk_buff *skb, @@ -67,6 +62,17 @@ int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk); +static inline size_t inet_diag_msg_attrs_size(void) +{ + return nla_total_size(1) /* INET_DIAG_SHUTDOWN */ + + nla_total_size(1) /* INET_DIAG_TOS */ +#if IS_ENABLED(CONFIG_IPV6) + + nla_total_size(1) /* INET_DIAG_TCLASS */ + + nla_total_size(1) /* INET_DIAG_SKV6ONLY */ +#endif + + nla_total_size(4) /* INET_DIAG_MARK */ + + nla_total_size(4); /* INET_DIAG_CLASS_ID */ +} int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_msg *r, int ext, struct user_namespace *user_ns, bool net_admin); diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h index 35e15dfd4155..cb20c733b15a 100644 --- a/include/linux/iopoll.h +++ b/include/linux/iopoll.h @@ -14,36 +14,41 @@ #include <linux/io.h> /** - * readx_poll_timeout - Periodically poll an address until a condition is met or a timeout occurs - * @op: accessor function (takes @addr as its only argument) - * @addr: Address to poll + * read_poll_timeout - Periodically poll an address until a condition is + * met or a timeout occurs + * @op: accessor function (takes @args as its arguments) * @val: Variable to read the value into * @cond: Break condition (usually involving @val) * @sleep_us: Maximum time to sleep between reads in us (0 * tight-loops). Should be less than ~20ms since usleep_range * is used (see Documentation/timers/timers-howto.rst). * @timeout_us: Timeout in us, 0 means never timeout + * @sleep_before_read: if it is true, sleep @sleep_us before read. + * @args: arguments for @op poll * * Returns 0 on success and -ETIMEDOUT upon a timeout. In either - * case, the last read value at @addr is stored in @val. Must not + * case, the last read value at @args is stored in @val. Must not * be called from atomic context if sleep_us or timeout_us are used. * * When available, you'll probably want to use one of the specialized * macros defined below rather than this macro directly. */ -#define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us) \ +#define read_poll_timeout(op, val, cond, sleep_us, timeout_us, \ + sleep_before_read, args...) \ ({ \ u64 __timeout_us = (timeout_us); \ unsigned long __sleep_us = (sleep_us); \ ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \ might_sleep_if((__sleep_us) != 0); \ + if (sleep_before_read && __sleep_us) \ + usleep_range((__sleep_us >> 2) + 1, __sleep_us); \ for (;;) { \ - (val) = op(addr); \ + (val) = op(args); \ if (cond) \ break; \ if (__timeout_us && \ ktime_compare(ktime_get(), __timeout) > 0) { \ - (val) = op(addr); \ + (val) = op(args); \ break; \ } \ if (__sleep_us) \ @@ -53,6 +58,27 @@ }) /** + * readx_poll_timeout - Periodically poll an address until a condition is met or a timeout occurs + * @op: accessor function (takes @addr as its only argument) + * @addr: Address to poll + * @val: Variable to read the value into + * @cond: Break condition (usually involving @val) + * @sleep_us: Maximum time to sleep between reads in us (0 + * tight-loops). Should be less than ~20ms since usleep_range + * is used (see Documentation/timers/timers-howto.rst). + * @timeout_us: Timeout in us, 0 means never timeout + * + * Returns 0 on success and -ETIMEDOUT upon a timeout. In either + * case, the last read value at @addr is stored in @val. Must not + * be called from atomic context if sleep_us or timeout_us are used. + * + * When available, you'll probably want to use one of the specialized + * macros defined below rather than this macro directly. + */ +#define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us) \ + read_poll_timeout(op, val, cond, sleep_us, timeout_us, false, addr) + +/** * readx_poll_timeout_atomic - Periodically poll an address until a condition is met or a timeout occurs * @op: accessor function (takes @addr as its only argument) * @addr: Address to poll diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7944ad6ac10b..bcb9b2ac0791 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1344,7 +1344,7 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ struct kvm_vcpu *kvm_get_running_vcpu(void); -struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); +struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS bool kvm_arch_has_irq_bypass(void); diff --git a/include/linux/mdio.h b/include/linux/mdio.h index a7604248777b..917e4bb2ed71 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -316,11 +316,15 @@ static inline void mii_10gbt_stat_mod_linkmode_lpa_t(unsigned long *advertising, int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum); int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); +int __mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum, + u16 mask, u16 set); int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum); int mdiobus_read_nested(struct mii_bus *bus, int addr, u32 regnum); int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); int mdiobus_write_nested(struct mii_bus *bus, int addr, u32 regnum, u16 val); +int mdiobus_modify(struct mii_bus *bus, int addr, u32 regnum, u16 mask, + u16 set); int mdiobus_register_device(struct mdio_device *mdiodev); int mdiobus_unregister_device(struct mdio_device *mdiodev); diff --git a/include/linux/mii.h b/include/linux/mii.h index 18c6208f56fc..219b93cad1dd 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -355,24 +355,6 @@ static inline u32 mii_adv_to_ethtool_adv_x(u32 adv) } /** - * mii_lpa_to_ethtool_lpa_x - * @adv: value of the MII_LPA register - * - * A small helper function that translates MII_LPA - * bits, when in 1000Base-X mode, to ethtool - * LP advertisement settings. - */ -static inline u32 mii_lpa_to_ethtool_lpa_x(u32 lpa) -{ - u32 result = 0; - - if (lpa & LPA_LPACK) - result |= ADVERTISED_Autoneg; - - return result | mii_adv_to_ethtool_adv_x(lpa); -} - -/** * mii_lpa_mod_linkmode_adv_sgmii * @lp_advertising: pointer to destination link mode. * @lpa: value of the MII_LPA register @@ -536,6 +518,45 @@ static inline u32 linkmode_adv_to_lcl_adv_t(unsigned long *advertising) } /** + * mii_lpa_mod_linkmode_x - decode the link partner's config_reg to linkmodes + * @linkmodes: link modes array + * @lpa: config_reg word from link partner + * @fd_bit: link mode for 1000XFULL bit + */ +static inline void mii_lpa_mod_linkmode_x(unsigned long *linkmodes, u16 lpa, + int fd_bit) +{ + linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, linkmodes, + lpa & LPA_LPACK); + linkmode_mod_bit(ETHTOOL_LINK_MODE_Pause_BIT, linkmodes, + lpa & LPA_1000XPAUSE); + linkmode_mod_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, linkmodes, + lpa & LPA_1000XPAUSE_ASYM); + linkmode_mod_bit(fd_bit, linkmodes, + lpa & LPA_1000XFULL); +} + +/** + * linkmode_adv_to_mii_adv_x - encode a linkmode to config_reg + * @linkmodes: linkmodes + * @fd_bit: full duplex bit + */ +static inline u16 linkmode_adv_to_mii_adv_x(const unsigned long *linkmodes, + int fd_bit) +{ + u16 adv = 0; + + if (linkmode_test_bit(fd_bit, linkmodes)) + adv |= ADVERTISE_1000XFULL; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, linkmodes)) + adv |= ADVERTISE_1000XPAUSE; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, linkmodes)) + adv |= ADVERTISE_1000XPSE_ASYM; + + return adv; +} + +/** * mii_advertise_flowctrl - get flow control advertisement flags * @cap: Flow control capabilities (FLOW_CTRL_RX, FLOW_CTRL_TX or both) */ diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 98e667b176ef..c16827eeba9c 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -70,8 +70,30 @@ u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev); enum devlink_eswitch_encap_mode mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev); +bool mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw); bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw); -u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, + +/* Reg C0 usage: + * Reg C0 = < ESW_VHCA_ID_BITS(8) | ESW_VPORT BITS(8) | ESW_CHAIN_TAG(16) > + * + * Highest 8 bits of the reg c0 is the vhca_id, next 8 bits is vport_num, + * the rest (lowest 16 bits) is left for tc chain tag restoration. + * VHCA_ID + VPORT comprise the SOURCE_PORT matching. + */ +#define ESW_VHCA_ID_BITS 8 +#define ESW_VPORT_BITS 8 +#define ESW_SOURCE_PORT_METADATA_BITS (ESW_VHCA_ID_BITS + ESW_VPORT_BITS) +#define ESW_SOURCE_PORT_METADATA_OFFSET (32 - ESW_SOURCE_PORT_METADATA_BITS) +#define ESW_CHAIN_TAG_METADATA_BITS (32 - ESW_SOURCE_PORT_METADATA_BITS) +#define ESW_CHAIN_TAG_METADATA_MASK GENMASK(ESW_CHAIN_TAG_METADATA_BITS - 1,\ + 0) + +static inline u32 mlx5_eswitch_get_vport_metadata_mask(void) +{ + return GENMASK(31, 32 - ESW_SOURCE_PORT_METADATA_BITS); +} + +u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, u16 vport_num); u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw); #else /* CONFIG_MLX5_ESWITCH */ @@ -88,17 +110,29 @@ mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) } static inline bool +mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw) +{ + return false; +}; + +static inline bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw) { return false; }; static inline u32 -mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, +mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, int vport_num) { return 0; }; + +static inline u32 +mlx5_eswitch_get_vport_metadata_mask(void) +{ + return 0; +} #endif /* CONFIG_MLX5_ESWITCH */ #endif diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 2bd920965bd3..cc55cee3b53c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -416,7 +416,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 termination_table[0x1]; u8 reformat_and_fwd_to_table[0x1]; u8 reserved_at_1a[0x6]; - u8 reserved_at_20[0x2]; + u8 termination_table_raw_traffic[0x1]; + u8 reserved_at_21[0x1]; u8 log_max_ft_size[0x6]; u8 log_max_modify_header_context[0x8]; u8 max_modify_header_actions[0x8]; diff --git a/include/linux/mm.h b/include/linux/mm.h index 52269e56c514..c54fb96cb1e6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2715,6 +2715,10 @@ static inline bool debug_pagealloc_enabled_static(void) #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_ARCH_HAS_SET_DIRECT_MAP) extern void __kernel_map_pages(struct page *page, int numpages, int enable); +/* + * When called in DEBUG_PAGEALLOC context, the call should most likely be + * guarded by debug_pagealloc_enabled() or debug_pagealloc_enabled_static() + */ static inline void kernel_map_pages(struct page *page, int numpages, int enable) { diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 5448c8b443db..ab192720e2d6 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -98,7 +98,7 @@ struct ip_set_counter { struct ip_set_comment_rcu { struct rcu_head rcu; - char str[0]; + char str[]; }; struct ip_set_comment { diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 1b261c51b3a3..5da88451853b 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -264,7 +264,7 @@ struct xt_table_info { unsigned int stacksize; void ***jumpstack; - unsigned char entries[0] __aligned(8); + unsigned char entries[] __aligned(8); }; int xt_register_target(struct xt_target *target); @@ -464,7 +464,7 @@ struct compat_xt_entry_match { } kernel; u_int16_t match_size; } u; - unsigned char data[0]; + unsigned char data[]; }; struct compat_xt_entry_target { @@ -480,7 +480,7 @@ struct compat_xt_entry_target { } kernel; u_int16_t target_size; } u; - unsigned char data[0]; + unsigned char data[]; }; /* FIXME: this works only on 32 bit tasks @@ -494,7 +494,7 @@ struct compat_xt_counters { struct compat_xt_counters_info { char name[XT_TABLE_MAXNAMELEN]; compat_uint_t num_counters; - struct compat_xt_counters counters[0]; + struct compat_xt_counters counters[]; }; struct _compat_xt_align { diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index e98028f00e47..7d3537c40ec9 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -67,7 +67,7 @@ struct compat_arpt_entry { __u16 next_offset; compat_uint_t comefrom; struct compat_xt_counters counters; - unsigned char elems[0]; + unsigned char elems[]; }; static inline struct xt_entry_target * diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 162f59d0d17a..2f5c4e6ecd8a 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -85,7 +85,7 @@ struct ebt_table_info { /* room to maintain the stack used for jumping from and into udc */ struct ebt_chainstack **chainstack; char *entries; - struct ebt_counter counters[0] ____cacheline_aligned; + struct ebt_counter counters[] ____cacheline_aligned; }; struct ebt_table { diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index e9e1ed74cdf1..b394bd4f68a3 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -76,7 +76,7 @@ struct compat_ipt_entry { __u16 next_offset; compat_uint_t comefrom; struct compat_xt_counters counters; - unsigned char elems[0]; + unsigned char elems[]; }; /* Helper functions */ diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 78ab959c4575..8225f7821a29 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -43,7 +43,7 @@ struct compat_ip6t_entry { __u16 next_offset; compat_uint_t comefrom; struct compat_xt_counters counters; - unsigned char elems[0]; + unsigned char elems[]; }; static inline struct xt_entry_target * diff --git a/include/linux/phy.h b/include/linux/phy.h index e72dbd0d2d6a..2432ca463ddc 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -23,6 +23,8 @@ #include <linux/workqueue.h> #include <linux/mod_devicetable.h> #include <linux/u64_stats_sync.h> +#include <linux/irqreturn.h> +#include <linux/iopoll.h> #include <linux/atomic.h> @@ -94,6 +96,7 @@ typedef enum { PHY_INTERFACE_MODE_RTBI, PHY_INTERFACE_MODE_SMII, PHY_INTERFACE_MODE_XGMII, + PHY_INTERFACE_MODE_XLGMII, PHY_INTERFACE_MODE_MOCA, PHY_INTERFACE_MODE_QSGMII, PHY_INTERFACE_MODE_TRGMII, @@ -165,6 +168,8 @@ static inline const char *phy_modes(phy_interface_t interface) return "smii"; case PHY_INTERFACE_MODE_XGMII: return "xgmii"; + case PHY_INTERFACE_MODE_XLGMII: + return "xlgmii"; case PHY_INTERFACE_MODE_MOCA: return "moca"; case PHY_INTERFACE_MODE_QSGMII: @@ -358,8 +363,10 @@ struct macsec_ops; * is_gigabit_capable: Set to true if PHY supports 1000Mbps * has_fixups: Set to true if this phy has fixups/quirks. * suspended: Set to true if this phy has been suspended successfully. + * suspended_by_mdio_bus: Set to true if this phy was suspended by MDIO bus. * sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal. * loopback_enabled: Set true if this phy has been loopbacked successfully. + * downshifted_rate: Set true if link speed has been downshifted. * state: state of the PHY for management purposes * dev_flags: Device-specific flags used by the PHY driver. * irq: IRQ number of the PHY's interrupt (-1 if none) @@ -397,8 +404,10 @@ struct phy_device { unsigned is_gigabit_capable:1; unsigned has_fixups:1; unsigned suspended:1; + unsigned suspended_by_mdio_bus:1; unsigned sysfs_links:1; unsigned loopback_enabled:1; + unsigned downshifted_rate:1; unsigned autoneg:1; /* The most recently read link state */ @@ -558,11 +567,12 @@ struct phy_driver { /* * Checks if the PHY generated an interrupt. * For multi-PHY devices with shared PHY interrupt pin + * Set interrupt bits have to be cleared. */ int (*did_interrupt)(struct phy_device *phydev); /* Override default interrupt handling */ - int (*handle_interrupt)(struct phy_device *phydev); + irqreturn_t (*handle_interrupt)(struct phy_device *phydev); /* Clears up any memory if needed */ void (*remove)(struct phy_device *phydev); @@ -691,6 +701,7 @@ static inline bool phy_is_started(struct phy_device *phydev) void phy_resolve_aneg_pause(struct phy_device *phydev); void phy_resolve_aneg_linkmode(struct phy_device *phydev); +void phy_check_downshift(struct phy_device *phydev); /** * phy_read - Convenience function for reading a given PHY register @@ -706,6 +717,19 @@ static inline int phy_read(struct phy_device *phydev, u32 regnum) return mdiobus_read(phydev->mdio.bus, phydev->mdio.addr, regnum); } +#define phy_read_poll_timeout(phydev, regnum, val, cond, sleep_us, \ + timeout_us, sleep_before_read) \ +({ \ + int __ret = read_poll_timeout(phy_read, val, (cond) || val < 0, \ + sleep_us, timeout_us, sleep_before_read, phydev, regnum); \ + if (val < 0) \ + __ret = val; \ + if (__ret) \ + phydev_err(phydev, "%s failed: %d\n", __func__, __ret); \ + __ret; \ +}) + + /** * __phy_read - convenience function for reading a given PHY register * @phydev: the phy_device struct @@ -748,6 +772,25 @@ static inline int __phy_write(struct phy_device *phydev, u32 regnum, u16 val) } /** + * __phy_modify_changed() - Convenience function for modifying a PHY register + * @phydev: a pointer to a &struct phy_device + * @regnum: register number + * @mask: bit mask of bits to clear + * @set: bit mask of bits to set + * + * Unlocked helper function which allows a PHY register to be modified as + * new register value = (old register value & ~mask) | set + * + * Returns negative errno, 0 if there was no change, and 1 in case of change + */ +static inline int __phy_modify_changed(struct phy_device *phydev, u32 regnum, + u16 mask, u16 set) +{ + return __mdiobus_modify_changed(phydev->mdio.bus, phydev->mdio.addr, + regnum, mask, set); +} + +/** * phy_read_mmd - Convenience function for reading a register * from an MMD on a given PHY. * @phydev: The phy_device struct @@ -758,6 +801,19 @@ static inline int __phy_write(struct phy_device *phydev, u32 regnum, u16 val) */ int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); +#define phy_read_mmd_poll_timeout(phydev, devaddr, regnum, val, cond, \ + sleep_us, timeout_us, sleep_before_read) \ +({ \ + int __ret = read_poll_timeout(phy_read_mmd, val, (cond) || val < 0, \ + sleep_us, timeout_us, sleep_before_read, \ + phydev, devaddr, regnum); \ + if (val < 0) \ + __ret = val; \ + if (__ret) \ + phydev_err(phydev, "%s failed: %d\n", __func__, __ret); \ + __ret; \ +}) + /** * __phy_read_mmd - Convenience function for reading a register * from an MMD on a given PHY. diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 2180eb1aa254..8fa6df3b881b 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -317,4 +317,12 @@ int phylink_mii_ioctl(struct phylink *, struct ifreq *, int); void phylink_set_port_modes(unsigned long *bits); void phylink_helper_basex_speed(struct phylink_link_state *state); +void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs, + struct phylink_link_state *state); +int phylink_mii_c22_pcs_set_advertisement(struct mdio_device *pcs, + const struct phylink_link_state *state); +void phylink_mii_c22_pcs_an_restart(struct mdio_device *pcs); + +void phylink_mii_c45_pcs_get_state(struct mdio_device *pcs, + struct phylink_link_state *state); #endif diff --git a/include/linux/platform_data/spi-omap2-mcspi.h b/include/linux/platform_data/spi-omap2-mcspi.h index 0bf9fddb8306..3b400b1919a9 100644 --- a/include/linux/platform_data/spi-omap2-mcspi.h +++ b/include/linux/platform_data/spi-omap2-mcspi.h @@ -11,6 +11,7 @@ struct omap2_mcspi_platform_config { unsigned short num_cs; unsigned int regs_offset; unsigned int pin_dir:1; + size_t max_xfer_len; }; struct omap2_mcspi_device_config { diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 276a03c24691..041bfa412aa0 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -24,7 +24,7 @@ struct platform_device { int id; bool id_auto; struct device dev; - u64 dma_mask; + u64 platform_dma_mask; u32 num_resources; struct resource *resource; diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 4626b1ac3b6c..adff08bfecf9 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -85,6 +85,8 @@ typedef struct ns_common *ns_get_path_helper_t(void *); extern int ns_get_path_cb(struct path *path, ns_get_path_helper_t ns_get_cb, void *private_data); +extern bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino); + extern int ns_get_name(char *buf, size_t size, struct task_struct *task, const struct proc_ns_operations *ns_ops); extern void nsfs_init(void); diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index beb9a9da1699..70ebef866cc8 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -972,9 +972,9 @@ static inline int rhashtable_lookup_insert_key( /** * rhashtable_lookup_get_insert_key - lookup and insert object into hash table * @ht: hash table + * @key: key * @obj: pointer to hash head inside object * @params: hash table parameters - * @data: pointer to element data already in hashes * * Just like rhashtable_lookup_insert_key(), but this function returns the * object if it exists, NULL if it does not and the insertion was successful, diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 112765bd146d..8a709f63c5e5 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -323,14 +323,6 @@ static inline void sk_psock_free_link(struct sk_psock_link *link) } struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock); -#if defined(CONFIG_BPF_STREAM_PARSER) -void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link); -#else -static inline void sk_psock_unlink(struct sock *sk, - struct sk_psock_link *link) -{ -} -#endif void __sk_psock_purge_ingress_msg(struct sk_psock *psock); @@ -347,11 +339,23 @@ static inline void sk_psock_update_proto(struct sock *sk, struct sk_psock *psock, struct proto *ops) { - psock->saved_unhash = sk->sk_prot->unhash; - psock->saved_close = sk->sk_prot->close; - psock->saved_write_space = sk->sk_write_space; + /* Initialize saved callbacks and original proto only once, since this + * function may be called multiple times for a psock, e.g. when + * psock->progs.msg_parser is updated. + * + * Since we've not installed the new proto, psock is not yet in use and + * we can initialize it without synchronization. + */ + if (!psock->sk_proto) { + struct proto *orig = READ_ONCE(sk->sk_prot); + + psock->saved_unhash = orig->unhash; + psock->saved_close = orig->close; + psock->saved_write_space = sk->sk_write_space; + + psock->sk_proto = orig; + } - psock->sk_proto = sk->sk_prot; /* Pairs with lockless read in sk_clone_lock() */ WRITE_ONCE(sk->sk_prot, ops); } @@ -360,7 +364,13 @@ static inline void sk_psock_restore_proto(struct sock *sk, struct sk_psock *psock) { sk->sk_prot->unhash = psock->saved_unhash; - tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); + if (inet_csk_has_ulp(sk)) { + tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); + } else { + sk->sk_write_space = psock->saved_write_space; + /* Pairs with lockless read in sk_clone_lock() */ + WRITE_ONCE(sk->sk_prot, psock->sk_proto); + } } static inline void sk_psock_set_state(struct sk_psock *psock, @@ -381,26 +391,6 @@ static inline bool sk_psock_test_state(const struct sk_psock *psock, return test_bit(bit, &psock->state); } -static inline struct sk_psock *sk_psock_get_checked(struct sock *sk) -{ - struct sk_psock *psock; - - rcu_read_lock(); - psock = sk_psock(sk); - if (psock) { - if (sk->sk_prot->recvmsg != tcp_bpf_recvmsg) { - psock = ERR_PTR(-EBUSY); - goto out; - } - - if (!refcount_inc_not_zero(&psock->refcnt)) - psock = ERR_PTR(-EBUSY); - } -out: - rcu_read_unlock(); - return psock; -} - static inline struct sk_psock *sk_psock_get(struct sock *sk) { struct sk_psock *psock; diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 9e531ec76274..4beb51009b62 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -562,8 +562,8 @@ static inline int sysfs_groups_change_owner(struct kobject *kobj, } static inline int sysfs_group_change_owner(struct kobject *kobj, - const struct attribute_group **groups, - kuid_t kuid, kgid_t kgid) + const struct attribute_group *groups, + kuid_t kuid, kgid_t kgid) { return 0; } diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 4261d1c6e87b..e48554e6526c 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -487,6 +487,19 @@ extern void wq_worker_comm(char *buf, size_t size, struct task_struct *task); * * We queue the work to the CPU on which it was submitted, but if the CPU dies * it can be processed by another CPU. + * + * Memory-ordering properties: If it returns %true, guarantees that all stores + * preceding the call to queue_work() in the program order will be visible from + * the CPU which will execute @work by the time such work executes, e.g., + * + * { x is initially 0 } + * + * CPU0 CPU1 + * + * WRITE_ONCE(x, 1); [ @work is being executed ] + * r0 = queue_work(wq, work); r1 = READ_ONCE(x); + * + * Forbids: r0 == true && r1 == 0 */ static inline bool queue_work(struct workqueue_struct *wq, struct work_struct *work) @@ -546,6 +559,9 @@ static inline bool schedule_work_on(int cpu, struct work_struct *work) * This puts a job in the kernel-global workqueue if it was not already * queued and leaves it in the same position on the kernel-global * workqueue otherwise. + * + * Shares the same memory-ordering properties of queue_work(), cf. the + * DocBook header of queue_work(). */ static inline bool schedule_work(struct work_struct *work) { diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h index a71378007e61..c80539be1542 100644 --- a/include/net/6lowpan.h +++ b/include/net/6lowpan.h @@ -138,7 +138,7 @@ struct lowpan_dev { struct lowpan_iphc_ctx_table ctx; /* must be last */ - u8 priv[0] __aligned(sizeof(void *)); + u8 priv[] __aligned(sizeof(void *)); }; struct lowpan_802154_neigh { diff --git a/include/net/act_api.h b/include/net/act_api.h index 41337c7fc728..ecdec9d6ead0 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -41,7 +41,7 @@ struct tc_action { struct tc_cookie __rcu *act_cookie; struct tcf_chain __rcu *goto_chain; u32 tcfa_flags; - u8 hw_stats_type; + u8 hw_stats; }; #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt @@ -53,8 +53,8 @@ struct tc_action { #define tcf_rate_est common.tcfa_rate_est #define tcf_lock common.tcfa_lock -#define TCA_ACT_HW_STATS_TYPE_ANY (TCA_ACT_HW_STATS_TYPE_IMMEDIATE | \ - TCA_ACT_HW_STATS_TYPE_DELAYED) +#define TCA_ACT_HW_STATS_ANY (TCA_ACT_HW_STATS_IMMEDIATE | \ + TCA_ACT_HW_STATS_DELAYED) /* Update lastuse only if needed, to avoid dirtying a cache line. * We use a temp variable to avoid fetching jiffies twice. diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index e42bb8e03c09..1576353a2773 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -121,6 +121,23 @@ struct bt_voice { #define BT_SNDMTU 12 #define BT_RCVMTU 13 +#define BT_PHY 14 + +#define BT_PHY_BR_1M_1SLOT 0x00000001 +#define BT_PHY_BR_1M_3SLOT 0x00000002 +#define BT_PHY_BR_1M_5SLOT 0x00000004 +#define BT_PHY_EDR_2M_1SLOT 0x00000008 +#define BT_PHY_EDR_2M_3SLOT 0x00000010 +#define BT_PHY_EDR_2M_5SLOT 0x00000020 +#define BT_PHY_EDR_3M_1SLOT 0x00000040 +#define BT_PHY_EDR_3M_3SLOT 0x00000080 +#define BT_PHY_EDR_3M_5SLOT 0x00000100 +#define BT_PHY_LE_1M_TX 0x00000200 +#define BT_PHY_LE_1M_RX 0x00000400 +#define BT_PHY_LE_2M_TX 0x00000800 +#define BT_PHY_LE_2M_RX 0x00001000 +#define BT_PHY_LE_CODED_TX 0x00002000 +#define BT_PHY_LE_CODED_RX 0x00004000 __printf(1, 2) void bt_info(const char *fmt, ...); diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 6293bdd7d862..5f60e135aeb6 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -115,7 +115,7 @@ enum { * wrongly configured local features that will require forcing * them to enable this mode. Getting RSSI information with the * inquiry responses is preferred since it allows for a better - * user expierence. + * user experience. * * This quirk must be set before hci_register_dev is called. */ @@ -142,7 +142,7 @@ enum { /* When this quirk is set, an external configuration step * is required and will be indicated with the controller - * configuation. + * configuration. * * This quirk can be set before hci_register_dev is called or * during the hdev->setup vendor callback. @@ -205,6 +205,15 @@ enum { * */ HCI_QUIRK_NON_PERSISTENT_SETUP, + + /* When this quirk is set, wide band speech is supported by + * the driver since no reliable mechanism exist to report + * this from the hardware, a driver flag is use to convey + * this support + * + * This quirk must be set before hci_register_dev is called. + */ + HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, }; /* HCI device flags */ @@ -277,6 +286,7 @@ enum { HCI_FAST_CONNECTABLE, HCI_BREDR_ENABLED, HCI_LE_SCAN_INTERRUPTED, + HCI_WIDEBAND_SPEECH_ENABLED, HCI_DUT_MODE, HCI_VENDOR_DIAG, @@ -932,10 +942,14 @@ struct hci_cp_sniff_subrate { #define HCI_OP_RESET 0x0c03 #define HCI_OP_SET_EVENT_FLT 0x0c05 -struct hci_cp_set_event_flt { - __u8 flt_type; - __u8 cond_type; - __u8 condition[0]; +#define HCI_SET_EVENT_FLT_SIZE 9 +struct hci_cp_set_event_filter { + __u8 flt_type; + __u8 cond_type; + struct { + bdaddr_t bdaddr; + __u8 auto_accept; + } __packed addr_conn_flt; } __packed; /* Filter types */ @@ -949,8 +963,9 @@ struct hci_cp_set_event_flt { #define HCI_CONN_SETUP_ALLOW_BDADDR 0x02 /* CONN_SETUP Conditions */ -#define HCI_CONN_SETUP_AUTO_OFF 0x01 -#define HCI_CONN_SETUP_AUTO_ON 0x02 +#define HCI_CONN_SETUP_AUTO_OFF 0x01 +#define HCI_CONN_SETUP_AUTO_ON 0x02 +#define HCI_CONN_SETUP_AUTO_ON_WITH_RS 0x03 #define HCI_OP_READ_STORED_LINK_KEY 0x0c0d struct hci_cp_read_stored_link_key { @@ -1086,6 +1101,19 @@ struct hci_rp_read_inq_rsp_tx_power { __s8 tx_power; } __packed; +#define HCI_OP_READ_DEF_ERR_DATA_REPORTING 0x0c5a + #define ERR_DATA_REPORTING_DISABLED 0x00 + #define ERR_DATA_REPORTING_ENABLED 0x01 +struct hci_rp_read_def_err_data_reporting { + __u8 status; + __u8 err_data_reporting; +} __packed; + +#define HCI_OP_WRITE_DEF_ERR_DATA_REPORTING 0x0c5b +struct hci_cp_write_def_err_data_reporting { + __u8 err_data_reporting; +} __packed; + #define HCI_OP_SET_EVENT_MASK_PAGE_2 0x0c63 #define HCI_OP_READ_LOCATION_DATA 0x0c64 @@ -1335,7 +1363,7 @@ struct hci_rp_read_local_amp_assoc { __u8 status; __u8 phy_handle; __le16 rem_len; - __u8 frag[0]; + __u8 frag[]; } __packed; #define HCI_OP_WRITE_REMOTE_AMP_ASSOC 0x140b @@ -1343,7 +1371,7 @@ struct hci_cp_write_remote_amp_assoc { __u8 phy_handle; __le16 len_so_far; __le16 rem_len; - __u8 frag[0]; + __u8 frag[]; } __packed; struct hci_rp_write_remote_amp_assoc { __u8 status; @@ -1613,7 +1641,7 @@ struct hci_cp_le_set_ext_scan_params { __u8 own_addr_type; __u8 filter_policy; __u8 scanning_phys; - __u8 data[0]; + __u8 data[]; } __packed; #define LE_SCAN_PHY_1M 0x01 @@ -1641,7 +1669,7 @@ struct hci_cp_le_ext_create_conn { __u8 peer_addr_type; bdaddr_t peer_addr; __u8 phys; - __u8 data[0]; + __u8 data[]; } __packed; struct hci_cp_le_ext_conn_param { @@ -1693,7 +1721,7 @@ struct hci_rp_le_set_ext_adv_params { struct hci_cp_le_set_ext_adv_enable { __u8 enable; __u8 num_of_sets; - __u8 data[0]; + __u8 data[]; } __packed; struct hci_cp_ext_adv_set { @@ -1724,6 +1752,8 @@ struct hci_cp_le_set_ext_scan_rsp_data { #define LE_SET_ADV_DATA_NO_FRAG 0x01 +#define HCI_OP_LE_REMOVE_ADV_SET 0x203c + #define HCI_OP_LE_CLEAR_ADV_SETS 0x203d #define HCI_OP_LE_SET_ADV_SET_RAND_ADDR 0x2035 @@ -1775,14 +1805,14 @@ struct hci_cp_le_set_cig_params { __le16 m_latency; __le16 s_latency; __u8 num_cis; - struct hci_cis_params cis[0]; + struct hci_cis_params cis[]; } __packed; struct hci_rp_le_set_cig_params { __u8 status; __u8 cig_id; __u8 num_handles; - __le16 handle[0]; + __le16 handle[]; } __packed; #define HCI_OP_LE_CREATE_CIS 0x2064 @@ -1793,7 +1823,7 @@ struct hci_cis { struct hci_cp_le_create_cis { __u8 num_cis; - struct hci_cis cis[0]; + struct hci_cis cis[]; } __packed; #define HCI_OP_LE_REMOVE_CIG 0x2065 @@ -1937,7 +1967,7 @@ struct hci_comp_pkts_info { struct hci_ev_num_comp_pkts { __u8 num_hndl; - struct hci_comp_pkts_info handles[0]; + struct hci_comp_pkts_info handles[]; } __packed; #define HCI_EV_MODE_CHANGE 0x14 @@ -2170,7 +2200,7 @@ struct hci_comp_blocks_info { struct hci_ev_num_comp_blocks { __le16 num_blocks; __u8 num_hndl; - struct hci_comp_blocks_info handles[0]; + struct hci_comp_blocks_info handles[]; } __packed; #define HCI_EV_SYNC_TRAIN_COMPLETE 0x4F @@ -2226,7 +2256,7 @@ struct hci_ev_le_advertising_info { __u8 bdaddr_type; bdaddr_t bdaddr; __u8 length; - __u8 data[0]; + __u8 data[]; } __packed; #define HCI_EV_LE_CONN_UPDATE_COMPLETE 0x03 @@ -2302,7 +2332,7 @@ struct hci_ev_le_ext_adv_report { __u8 direct_addr_type; bdaddr_t direct_addr; __u8 length; - __u8 data[0]; + __u8 data[]; } __packed; #define HCI_EV_LE_ENHANCED_CONN_COMPLETE 0x0a @@ -2362,7 +2392,7 @@ struct hci_evt_le_cis_req { #define HCI_EV_STACK_INTERNAL 0xfd struct hci_ev_stack_internal { __u16 type; - __u8 data[0]; + __u8 data[]; } __packed; #define HCI_EV_SI_DEVICE 0x01 @@ -2409,7 +2439,7 @@ struct hci_sco_hdr { struct hci_iso_hdr { __le16 handle; __le16 dlen; - __u8 data[0]; + __u8 data[]; } __packed; /* ISO data packet status flags */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 89ecf0a80aa1..d4e28773d378 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -88,6 +88,31 @@ struct discovery_state { unsigned long scan_duration; }; +#define SUSPEND_NOTIFIER_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ + +enum suspend_tasks { + SUSPEND_PAUSE_DISCOVERY, + SUSPEND_UNPAUSE_DISCOVERY, + + SUSPEND_PAUSE_ADVERTISING, + SUSPEND_UNPAUSE_ADVERTISING, + + SUSPEND_SCAN_DISABLE, + SUSPEND_SCAN_ENABLE, + SUSPEND_DISCONNECTING, + + SUSPEND_POWERING_DOWN, + + SUSPEND_PREPARE_NOTIFIER, + __SUSPEND_NUM_TASKS +}; + +enum suspended_state { + BT_RUNNING = 0, + BT_SUSPEND_DISCONNECT, + BT_SUSPEND_COMPLETE, +}; + struct hci_conn_hash { struct list_head list; unsigned int acl_num; @@ -260,6 +285,7 @@ struct hci_dev { __u8 stored_num_keys; __u8 io_capability; __s8 inq_tx_power; + __u8 err_data_reporting; __u16 page_scan_interval; __u16 page_scan_window; __u8 page_scan_type; @@ -389,11 +415,28 @@ struct hci_dev { void *smp_bredr_data; struct discovery_state discovery; + + int discovery_old_state; + bool discovery_paused; + int advertising_old_state; + bool advertising_paused; + + struct notifier_block suspend_notifier; + struct work_struct suspend_prepare; + enum suspended_state suspend_state_next; + enum suspended_state suspend_state; + bool scanning_paused; + bool suspended; + + wait_queue_head_t suspend_wait_q; + DECLARE_BITMAP(suspend_tasks, __SUSPEND_NUM_TASKS); + struct hci_conn_hash conn_hash; struct list_head mgmt_pending; struct list_head blacklist; struct list_head whitelist; + struct list_head wakeable; struct list_head uuids; struct list_head link_keys; struct list_head long_term_keys; @@ -575,6 +618,7 @@ struct hci_conn_params { struct hci_conn *conn; bool explicit_connect; + bool wakeable; }; extern struct list_head hci_dev_list; @@ -1477,6 +1521,8 @@ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode); struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param, u32 timeout); +u32 hci_conn_get_phy(struct hci_conn *conn); + /* ----- HCI Sockets ----- */ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb); void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, diff --git a/include/net/bluetooth/hci_sock.h b/include/net/bluetooth/hci_sock.h index 8e9138acdae1..9352bb1bf34c 100644 --- a/include/net/bluetooth/hci_sock.h +++ b/include/net/bluetooth/hci_sock.h @@ -144,19 +144,19 @@ struct hci_dev_req { struct hci_dev_list_req { __u16 dev_num; - struct hci_dev_req dev_req[0]; /* hci_dev_req structures */ + struct hci_dev_req dev_req[]; /* hci_dev_req structures */ }; struct hci_conn_list_req { __u16 dev_id; __u16 conn_num; - struct hci_conn_info conn_info[0]; + struct hci_conn_info conn_info[]; }; struct hci_conn_info_req { bdaddr_t bdaddr; __u8 type; - struct hci_conn_info conn_info[0]; + struct hci_conn_info conn_info[]; }; struct hci_auth_info_req { diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 093aedebdf0c..537aaead259f 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -119,6 +119,10 @@ struct l2cap_conninfo { #define L2CAP_LE_CONN_REQ 0x14 #define L2CAP_LE_CONN_RSP 0x15 #define L2CAP_LE_CREDITS 0x16 +#define L2CAP_ECRED_CONN_REQ 0x17 +#define L2CAP_ECRED_CONN_RSP 0x18 +#define L2CAP_ECRED_RECONF_REQ 0x19 +#define L2CAP_ECRED_RECONF_RSP 0x1a /* L2CAP extended feature mask */ #define L2CAP_FEAT_FLOWCTL 0x00000001 @@ -290,6 +294,8 @@ struct l2cap_conn_rsp { #define L2CAP_CR_LE_ENCRYPTION 0x0008 #define L2CAP_CR_LE_INVALID_SCID 0x0009 #define L2CAP_CR_LE_SCID_IN_USE 0X000A +#define L2CAP_CR_LE_UNACCEPT_PARAMS 0X000B +#define L2CAP_CR_LE_INVALID_PARAMS 0X000C /* connect/create channel status */ #define L2CAP_CS_NO_INFO 0x0000 @@ -299,14 +305,14 @@ struct l2cap_conn_rsp { struct l2cap_conf_req { __le16 dcid; __le16 flags; - __u8 data[0]; + __u8 data[]; } __packed; struct l2cap_conf_rsp { __le16 scid; __le16 flags; __le16 result; - __u8 data[0]; + __u8 data[]; } __packed; #define L2CAP_CONF_SUCCESS 0x0000 @@ -322,7 +328,7 @@ struct l2cap_conf_rsp { struct l2cap_conf_opt { __u8 type; __u8 len; - __u8 val[0]; + __u8 val[]; } __packed; #define L2CAP_CONF_OPT_SIZE 2 @@ -359,6 +365,7 @@ struct l2cap_conf_rfc { * ever be used in the BR/EDR configuration phase. */ #define L2CAP_MODE_LE_FLOWCTL 0x80 +#define L2CAP_MODE_EXT_FLOWCTL 0x81 struct l2cap_conf_efs { __u8 id; @@ -392,7 +399,7 @@ struct l2cap_info_req { struct l2cap_info_rsp { __le16 type; __le16 result; - __u8 data[0]; + __u8 data[]; } __packed; struct l2cap_create_chan_req { @@ -483,6 +490,39 @@ struct l2cap_le_credits { __le16 credits; } __packed; +#define L2CAP_ECRED_MIN_MTU 64 +#define L2CAP_ECRED_MIN_MPS 64 + +struct l2cap_ecred_conn_req { + __le16 psm; + __le16 mtu; + __le16 mps; + __le16 credits; + __le16 scid[0]; +} __packed; + +struct l2cap_ecred_conn_rsp { + __le16 mtu; + __le16 mps; + __le16 credits; + __le16 result; + __le16 dcid[0]; +}; + +struct l2cap_ecred_reconf_req { + __le16 mtu; + __le16 mps; + __le16 scid[0]; +} __packed; + +#define L2CAP_RECONF_SUCCESS 0x0000 +#define L2CAP_RECONF_INVALID_MTU 0x0001 +#define L2CAP_RECONF_INVALID_MPS 0x0002 + +struct l2cap_ecred_reconf_rsp { + __le16 result; +} __packed; + /* ----- L2CAP channels and connections ----- */ struct l2cap_seq_list { __u16 head; @@ -724,6 +764,7 @@ enum { FLAG_EFS_ENABLE, FLAG_DEFER_SETUP, FLAG_LE_CONN_REQ_SENT, + FLAG_ECRED_CONN_REQ_SENT, FLAG_PENDING_SECURITY, FLAG_HOLD_HCI_CONN, }; @@ -917,12 +958,14 @@ static inline long l2cap_chan_no_get_sndtimeo(struct l2cap_chan *chan) } extern bool disable_ertm; +extern bool enable_ecred; int l2cap_init_sockets(void); void l2cap_cleanup_sockets(void); bool l2cap_is_socket(struct socket *sock); void __l2cap_le_connect_rsp_defer(struct l2cap_chan *chan); +void __l2cap_ecred_conn_rsp_defer(struct l2cap_chan *chan); void __l2cap_connect_rsp_defer(struct l2cap_chan *chan); int l2cap_add_psm(struct l2cap_chan *chan, bdaddr_t *src, __le16 psm); @@ -932,6 +975,7 @@ struct l2cap_chan *l2cap_chan_create(void); void l2cap_chan_close(struct l2cap_chan *chan, int reason); int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, bdaddr_t *dst, u8 dst_type); +int l2cap_chan_reconfigure(struct l2cap_chan *chan, __u16 mtu); int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len); void l2cap_chan_busy(struct l2cap_chan *chan, int busy); int l2cap_chan_check_security(struct l2cap_chan *chan, bool initiator); diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index a90666af05bd..f41cd87550dc 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -101,7 +101,8 @@ struct mgmt_rp_read_index_list { #define MGMT_SETTING_PRIVACY 0x00002000 #define MGMT_SETTING_CONFIGURATION 0x00004000 #define MGMT_SETTING_STATIC_ADDRESS 0x00008000 -#define MGMT_SETTING_PHY_CONFIGURATION 0x00010000 +#define MGMT_SETTING_PHY_CONFIGURATION 0x00010000 +#define MGMT_SETTING_WIDEBAND_SPEECH 0x00020000 #define MGMT_OP_READ_INFO 0x0004 #define MGMT_READ_INFO_SIZE 0 @@ -671,6 +672,8 @@ struct mgmt_cp_set_blocked_keys { } __packed; #define MGMT_OP_SET_BLOCKED_KEYS_SIZE 2 +#define MGMT_OP_SET_WIDEBAND_SPEECH 0x0047 + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index da4acefe39c8..99d26879b02a 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -34,7 +34,6 @@ #define RFCOMM_DEFAULT_MTU 127 #define RFCOMM_DEFAULT_CREDITS 7 -#define RFCOMM_MAX_L2CAP_MTU 1013 #define RFCOMM_MAX_CREDITS 40 #define RFCOMM_SKB_HEAD_RESERVE 8 @@ -356,7 +355,7 @@ struct rfcomm_dev_info { struct rfcomm_dev_list_req { u16 dev_num; - struct rfcomm_dev_info dev_info[0]; + struct rfcomm_dev_info dev_info[]; }; int rfcomm_dev_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f7c84c32ba39..c78bd4ff9e33 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -7,7 +7,7 @@ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2019 Intel Corporation + * Copyright (C) 2018-2020 Intel Corporation */ #include <linux/netdevice.h> @@ -924,6 +924,7 @@ struct cfg80211_crypto_settings { __be16 control_port_ethertype; bool control_port_no_encrypt; bool control_port_over_nl80211; + bool control_port_no_preauth; struct key_params *wep_keys; int wep_tx_key; const u8 *psk; @@ -1054,6 +1055,7 @@ enum cfg80211_ap_settings_flags { * @flags: flags, as defined in enum cfg80211_ap_settings_flags * @he_obss_pd: OBSS Packet Detection settings * @he_bss_color: BSS Color settings + * @he_oper: HE operation IE (or %NULL if HE isn't enabled) */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -1078,6 +1080,7 @@ struct cfg80211_ap_settings { const struct ieee80211_ht_cap *ht_cap; const struct ieee80211_vht_cap *vht_cap; const struct ieee80211_he_cap_elem *he_cap; + const struct ieee80211_he_operation *he_oper; bool ht_required, vht_required; bool twt_responder; u32 flags; @@ -2696,6 +2699,17 @@ enum wiphy_params_flags { * @cache_id: 2-octet cache identifier advertized by a FILS AP identifying the * scope of PMKSA. This is valid only if @ssid_len is non-zero (may be * %NULL). + * @pmk_lifetime: Maximum lifetime for PMKSA in seconds + * (dot11RSNAConfigPMKLifetime) or 0 if not specified. + * The configured PMKSA must not be used for PMKSA caching after + * expiration and any keys derived from this PMK become invalid on + * expiration, i.e., the current association must be dropped if the PMK + * used for it expires. + * @pmk_reauth_threshold: Threshold time for reauthentication (percentage of + * PMK lifetime, dot11RSNAConfigPMKReauthThreshold) or 0 if not specified. + * Drivers are expected to trigger a full authentication instead of using + * this PMKSA for caching when reassociating to a new BSS after this + * threshold to generate a new PMK before the current one expires. */ struct cfg80211_pmksa { const u8 *bssid; @@ -2705,6 +2719,8 @@ struct cfg80211_pmksa { const u8 *ssid; size_t ssid_len; const u8 *cache_id; + u32 pmk_lifetime; + u8 pmk_reauth_threshold; }; /** @@ -3269,6 +3285,12 @@ struct cfg80211_pmsr_result { * @ftmr_retries: number of retries for FTM request * @request_lci: request LCI information * @request_civicloc: request civic location information + * @trigger_based: use trigger based ranging for the measurement + * If neither @trigger_based nor @non_trigger_based is set, + * EDCA based ranging will be used. + * @non_trigger_based: use non trigger based ranging for the measurement + * If neither @trigger_based nor @non_trigger_based is set, + * EDCA based ranging will be used. * * See also nl80211 for the respective attribute documentation. */ @@ -3278,7 +3300,9 @@ struct cfg80211_pmsr_ftm_request_peer { u8 requested:1, asap:1, request_lci:1, - request_civicloc:1; + request_civicloc:1, + trigger_based:1, + non_trigger_based:1; u8 num_bursts_exp; u8 burst_duration; u8 ftms_per_burst; @@ -3404,7 +3428,7 @@ struct cfg80211_update_owe_info { * @set_default_key: set the default key on an interface * * @set_default_mgmt_key: set the default management frame key on an interface - + * * @set_default_beacon_key: set the default Beacon frame key on an interface * * @set_rekey_data: give the data necessary for GTK rekeying to the driver @@ -4434,6 +4458,8 @@ struct wiphy_iftype_ext_capab { * forbid using the value 15 to let the responder pick) * @ftm.max_ftms_per_burst: maximum FTMs per burst supported (set to 0 if * not limited) + * @ftm.trigger_based: trigger based ranging measurement is supported + * @ftm.non_trigger_based: non trigger based ranging measurement is supported */ struct cfg80211_pmsr_capabilities { unsigned int max_peers; @@ -4449,7 +4475,9 @@ struct cfg80211_pmsr_capabilities { asap:1, non_asap:1, request_lci:1, - request_civicloc:1; + request_civicloc:1, + trigger_based:1, + non_trigger_based:1; } ftm; }; diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 6f86073a5d7d..6ed07844eb24 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -214,7 +214,7 @@ struct wpan_phy { /* the network namespace this phy lives in currently */ possible_net_t _net; - char priv[0] __aligned(NETDEV_ALIGN); + char priv[] __aligned(NETDEV_ALIGN); }; static inline struct net *wpan_phy_net(struct wpan_phy *wpan_phy) diff --git a/include/net/devlink.h b/include/net/devlink.h index c9ca86b054bc..37230e23b5b0 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -490,6 +490,8 @@ enum devlink_param_generic_id { #define DEVLINK_INFO_VERSION_GENERIC_FW_PSID "fw.psid" /* RoCE FW version */ #define DEVLINK_INFO_VERSION_GENERIC_FW_ROCE "fw.roce" +/* Firmware bundle identifier */ +#define DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID "fw.bundle_id" struct devlink_region; struct devlink_info_req; @@ -551,7 +553,7 @@ struct devlink_trap_group { * @generic: Whether the trap is generic or not. * @id: Trap identifier. * @name: Trap name. - * @group: Immutable packet trap group attributes. + * @init_group_id: Initial group identifier. * @metadata_cap: Metadata types that can be provided by the trap. * * Describes immutable attributes of packet traps that drivers register with @@ -563,7 +565,7 @@ struct devlink_trap { bool generic; u16 id; const char *name; - struct devlink_trap_group group; + u16 init_group_id; u32 metadata_cap; }; @@ -692,18 +694,19 @@ enum devlink_trap_group_generic_id { #define DEVLINK_TRAP_GROUP_GENERIC_NAME_ACL_DROPS \ "acl_drops" -#define DEVLINK_TRAP_GENERIC(_type, _init_action, _id, _group, _metadata_cap) \ +#define DEVLINK_TRAP_GENERIC(_type, _init_action, _id, _group_id, \ + _metadata_cap) \ { \ .type = DEVLINK_TRAP_TYPE_##_type, \ .init_action = DEVLINK_TRAP_ACTION_##_init_action, \ .generic = true, \ .id = DEVLINK_TRAP_GENERIC_ID_##_id, \ .name = DEVLINK_TRAP_GENERIC_NAME_##_id, \ - .group = _group, \ + .init_group_id = _group_id, \ .metadata_cap = _metadata_cap, \ } -#define DEVLINK_TRAP_DRIVER(_type, _init_action, _id, _name, _group, \ +#define DEVLINK_TRAP_DRIVER(_type, _init_action, _id, _name, _group_id, \ _metadata_cap) \ { \ .type = DEVLINK_TRAP_TYPE_##_type, \ @@ -711,7 +714,7 @@ enum devlink_trap_group_generic_id { .generic = false, \ .id = _id, \ .name = _name, \ - .group = _group, \ + .init_group_id = _group_id, \ .metadata_cap = _metadata_cap, \ } @@ -1055,6 +1058,12 @@ void devlink_trap_report(struct devlink *devlink, struct sk_buff *skb, void *trap_ctx, struct devlink_port *in_devlink_port, const struct flow_action_cookie *fa_cookie); void *devlink_trap_ctx_priv(void *trap_ctx); +int devlink_trap_groups_register(struct devlink *devlink, + const struct devlink_trap_group *groups, + size_t groups_count); +void devlink_trap_groups_unregister(struct devlink *devlink, + const struct devlink_trap_group *groups, + size_t groups_count); #if IS_ENABLED(CONFIG_NET_DEVLINK) diff --git a/include/net/dst.h b/include/net/dst.h index 3448cf865ede..07adfacd8088 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -35,7 +35,6 @@ struct dst_entry { int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); unsigned short flags; -#define DST_HOST 0x0001 #define DST_NOXFRM 0x0002 #define DST_NOPOLICY 0x0004 #define DST_NOCOUNT 0x0008 diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 54e227e6b06a..a259050f84af 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -108,6 +108,7 @@ struct fib_rule_notifier_info { [FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ [FRA_PRIORITY] = { .type = NLA_U32 }, \ [FRA_FWMARK] = { .type = NLA_U32 }, \ + [FRA_TUN_ID] = { .type = NLA_U64 }, \ [FRA_FWMASK] = { .type = NLA_U32 }, \ [FRA_TABLE] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index d1b1e4aa310a..d06bf8d566ac 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -69,6 +69,10 @@ struct flow_match_enc_opts { struct flow_dissector_key_enc_opts *key, *mask; }; +struct flow_match_ct { + struct flow_dissector_key_ct *key, *mask; +}; + struct flow_rule; void flow_rule_match_meta(const struct flow_rule *rule, @@ -111,6 +115,8 @@ void flow_rule_match_enc_keyid(const struct flow_rule *rule, struct flow_match_enc_keyid *out); void flow_rule_match_enc_opts(const struct flow_rule *rule, struct flow_match_enc_opts *out); +void flow_rule_match_ct(const struct flow_rule *rule, + struct flow_match_ct *out); enum flow_action_id { FLOW_ACTION_ACCEPT = 0, @@ -131,11 +137,13 @@ enum flow_action_id { FLOW_ACTION_CSUM, FLOW_ACTION_MARK, FLOW_ACTION_PTYPE, + FLOW_ACTION_PRIORITY, FLOW_ACTION_WAKE, FLOW_ACTION_QUEUE, FLOW_ACTION_SAMPLE, FLOW_ACTION_POLICE, FLOW_ACTION_CT, + FLOW_ACTION_CT_METADATA, FLOW_ACTION_MPLS_PUSH, FLOW_ACTION_MPLS_POP, FLOW_ACTION_MPLS_MANGLE, @@ -155,20 +163,18 @@ enum flow_action_mangle_base { FLOW_ACT_MANGLE_HDR_TYPE_UDP, }; -enum flow_action_hw_stats_type_bit { - FLOW_ACTION_HW_STATS_TYPE_IMMEDIATE_BIT, - FLOW_ACTION_HW_STATS_TYPE_DELAYED_BIT, +enum flow_action_hw_stats_bit { + FLOW_ACTION_HW_STATS_IMMEDIATE_BIT, + FLOW_ACTION_HW_STATS_DELAYED_BIT, }; -enum flow_action_hw_stats_type { - FLOW_ACTION_HW_STATS_TYPE_DISABLED = 0, - FLOW_ACTION_HW_STATS_TYPE_IMMEDIATE = - BIT(FLOW_ACTION_HW_STATS_TYPE_IMMEDIATE_BIT), - FLOW_ACTION_HW_STATS_TYPE_DELAYED = - BIT(FLOW_ACTION_HW_STATS_TYPE_DELAYED_BIT), - FLOW_ACTION_HW_STATS_TYPE_ANY = - FLOW_ACTION_HW_STATS_TYPE_IMMEDIATE | - FLOW_ACTION_HW_STATS_TYPE_DELAYED, +enum flow_action_hw_stats { + FLOW_ACTION_HW_STATS_DISABLED = 0, + FLOW_ACTION_HW_STATS_IMMEDIATE = + BIT(FLOW_ACTION_HW_STATS_IMMEDIATE_BIT), + FLOW_ACTION_HW_STATS_DELAYED = BIT(FLOW_ACTION_HW_STATS_DELAYED_BIT), + FLOW_ACTION_HW_STATS_ANY = FLOW_ACTION_HW_STATS_IMMEDIATE | + FLOW_ACTION_HW_STATS_DELAYED, }; typedef void (*action_destr)(void *priv); @@ -185,7 +191,7 @@ void flow_action_cookie_destroy(struct flow_action_cookie *cookie); struct flow_action_entry { enum flow_action_id id; - enum flow_action_hw_stats_type hw_stats_type; + enum flow_action_hw_stats hw_stats; action_destr destructor; void *destructor_priv; union { @@ -206,6 +212,7 @@ struct flow_action_entry { u32 csum_flags; /* FLOW_ACTION_CSUM */ u32 mark; /* FLOW_ACTION_MARK */ u16 ptype; /* FLOW_ACTION_PTYPE */ + u32 priority; /* FLOW_ACTION_PRIORITY */ struct { /* FLOW_ACTION_QUEUE */ u32 ctx; u32 index; @@ -224,7 +231,13 @@ struct flow_action_entry { struct { /* FLOW_ACTION_CT */ int action; u16 zone; + struct nf_flowtable *flow_table; } ct; + struct { + unsigned long cookie; + u32 mark; + u32 labels[4]; + } ct_metadata; struct { /* FLOW_ACTION_MPLS_PUSH */ u32 label; __be16 proto; @@ -272,22 +285,22 @@ static inline bool flow_offload_has_one_action(const struct flow_action *action) __act = &(__actions)->entries[++__i]) static inline bool -flow_action_mixed_hw_stats_types_check(const struct flow_action *action, - struct netlink_ext_ack *extack) +flow_action_mixed_hw_stats_check(const struct flow_action *action, + struct netlink_ext_ack *extack) { const struct flow_action_entry *action_entry; - u8 uninitialized_var(last_hw_stats_type); + u8 uninitialized_var(last_hw_stats); int i; if (flow_offload_has_one_action(action)) return true; flow_action_for_each(i, action_entry, action) { - if (i && action_entry->hw_stats_type != last_hw_stats_type) { + if (i && action_entry->hw_stats != last_hw_stats) { NL_SET_ERR_MSG_MOD(extack, "Mixing HW stats types for actions is not supported"); return false; } - last_hw_stats_type = action_entry->hw_stats_type; + last_hw_stats = action_entry->hw_stats; } return true; } @@ -300,24 +313,24 @@ flow_action_first_entry_get(const struct flow_action *action) } static inline bool -__flow_action_hw_stats_types_check(const struct flow_action *action, - struct netlink_ext_ack *extack, - bool check_allow_bit, - enum flow_action_hw_stats_type_bit allow_bit) +__flow_action_hw_stats_check(const struct flow_action *action, + struct netlink_ext_ack *extack, + bool check_allow_bit, + enum flow_action_hw_stats_bit allow_bit) { const struct flow_action_entry *action_entry; if (!flow_action_has_entries(action)) return true; - if (!flow_action_mixed_hw_stats_types_check(action, extack)) + if (!flow_action_mixed_hw_stats_check(action, extack)) return false; action_entry = flow_action_first_entry_get(action); if (!check_allow_bit && - action_entry->hw_stats_type != FLOW_ACTION_HW_STATS_TYPE_ANY) { + action_entry->hw_stats != FLOW_ACTION_HW_STATS_ANY) { NL_SET_ERR_MSG_MOD(extack, "Driver supports only default HW stats type \"any\""); return false; } else if (check_allow_bit && - !(action_entry->hw_stats_type & BIT(allow_bit))) { + !(action_entry->hw_stats & BIT(allow_bit))) { NL_SET_ERR_MSG_MOD(extack, "Driver does not support selected HW stats type"); return false; } @@ -325,19 +338,18 @@ __flow_action_hw_stats_types_check(const struct flow_action *action, } static inline bool -flow_action_hw_stats_types_check(const struct flow_action *action, - struct netlink_ext_ack *extack, - enum flow_action_hw_stats_type_bit allow_bit) +flow_action_hw_stats_check(const struct flow_action *action, + struct netlink_ext_ack *extack, + enum flow_action_hw_stats_bit allow_bit) { - return __flow_action_hw_stats_types_check(action, extack, - true, allow_bit); + return __flow_action_hw_stats_check(action, extack, true, allow_bit); } static inline bool -flow_action_basic_hw_stats_types_check(const struct flow_action *action, - struct netlink_ext_ack *extack) +flow_action_basic_hw_stats_check(const struct flow_action *action, + struct netlink_ext_ack *extack) { - return __flow_action_hw_stats_types_check(action, extack, false, 0); + return __flow_action_hw_stats_check(action, extack, false, 0); } struct flow_rule { diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 895546058a20..a3f076befa4f 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -335,4 +335,10 @@ static inline void inet_csk_inc_pingpong_cnt(struct sock *sk) if (icsk->icsk_ack.pingpong < U8_MAX) icsk->icsk_ack.pingpong++; } + +static inline bool inet_csk_has_ulp(struct sock *sk) +{ + return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops; +} + #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 6ec26e4d7f11..80262d2980f5 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -190,11 +190,10 @@ struct fib6_info { u8 should_flush:1, dst_nocount:1, dst_nopolicy:1, - dst_host:1, fib6_destroying:1, offload:1, trap:1, - unused:1; + unused:2; struct rcu_head rcu; struct nexthop *nh; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5ef34a2ba3c7..b6b4de0e4b5e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1987,6 +1987,7 @@ struct ieee80211_sta_txpwr { * @support_p2p_ps: indicates whether the STA supports P2P PS mechanism or not. * @max_rc_amsdu_len: Maximum A-MSDU size in bytes recommended by rate control. * @max_tid_amsdu_len: Maximum A-MSDU size in bytes for this TID + * @txpwr: the station tx power configuration * @txq: per-TID data TX queues (if driver uses the TXQ abstraction); note that * the last entry (%IEEE80211_NUM_TIDS) is used for non-data frames */ @@ -3451,6 +3452,10 @@ enum ieee80211_reconfig_type { * in AP mode, this callback will not be called when the flag * %IEEE80211_HW_AP_LINK_PS is set. Must be atomic. * + * @sta_set_txpwr: Configure the station tx power. This callback set the tx + * power for the station. + * This callback can sleep. + * * @sta_state: Notifies low level driver about state transition of a * station (which can be the AP, a client, IBSS/WDS/mesh peer etc.) * This callback is mutually exclusive with @sta_add/@sta_remove. diff --git a/include/net/macsec.h b/include/net/macsec.h index 92e43db8b566..2e4780dbf5c6 100644 --- a/include/net/macsec.h +++ b/include/net/macsec.h @@ -11,18 +11,48 @@ #include <uapi/linux/if_link.h> #include <uapi/linux/if_macsec.h> -typedef u64 __bitwise sci_t; +#define MACSEC_DEFAULT_PN_LEN 4 +#define MACSEC_XPN_PN_LEN 8 +#define MACSEC_SALT_LEN 12 #define MACSEC_NUM_AN 4 /* 2 bits for the association number */ +typedef u64 __bitwise sci_t; +typedef u32 __bitwise ssci_t; + +typedef union salt { + struct { + u32 ssci; + u64 pn; + } __packed; + u8 bytes[MACSEC_SALT_LEN]; +} __packed salt_t; + +typedef union pn { + struct { +#if defined(__LITTLE_ENDIAN_BITFIELD) + u32 lower; + u32 upper; +#elif defined(__BIG_ENDIAN_BITFIELD) + u32 upper; + u32 lower; +#else +#error "Please fix <asm/byteorder.h>" +#endif + }; + u64 full64; +} pn_t; + /** * struct macsec_key - SA key * @id: user-provided key identifier * @tfm: crypto struct, key storage + * @salt: salt used to generate IV in XPN cipher suites */ struct macsec_key { u8 id[MACSEC_KEYID_LEN]; struct crypto_aead *tfm; + salt_t salt; }; struct macsec_rx_sc_stats { @@ -64,12 +94,17 @@ struct macsec_tx_sc_stats { * @next_pn: packet number expected for the next packet * @lock: protects next_pn manipulations * @key: key structure + * @ssci: short secure channel identifier * @stats: per-SA stats */ struct macsec_rx_sa { struct macsec_key key; + ssci_t ssci; spinlock_t lock; - u32 next_pn; + union { + pn_t next_pn_halves; + u64 next_pn; + }; refcount_t refcnt; bool active; struct macsec_rx_sa_stats __percpu *stats; @@ -110,12 +145,17 @@ struct macsec_rx_sc { * @next_pn: packet number to use for the next packet * @lock: protects next_pn manipulations * @key: key structure + * @ssci: short secure channel identifier * @stats: per-SA stats */ struct macsec_tx_sa { struct macsec_key key; + ssci_t ssci; spinlock_t lock; - u32 next_pn; + union { + pn_t next_pn_halves; + u64 next_pn; + }; refcount_t refcnt; bool active; struct macsec_tx_sa_stats __percpu *stats; @@ -152,6 +192,7 @@ struct macsec_tx_sc { * @key_len: length of keys used by the cipher suite * @icv_len: length of ICV used by the cipher suite * @validate_frames: validation mode + * @xpn: enable XPN for this SecY * @operational: MAC_Operational flag * @protect_frames: enable protection for this SecY * @replay_protect: enable packet number checks on receive @@ -166,6 +207,7 @@ struct macsec_secy { u16 key_len; u16 icv_len; enum macsec_validation_type validate_frames; + bool xpn; bool operational; bool protect_frames; bool replay_protect; diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 5ae5295aa46d..e1e588387103 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -45,7 +45,7 @@ enum nf_ct_ext_id { struct nf_ct_ext { u8 offset[NF_CT_EXT_NUM]; u8 len; - char data[0]; + char data[]; }; static inline bool __nf_ct_ext_exist(const struct nf_ct_ext *ext, u8 id) diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index 6dd72396f534..659b0ea25b4d 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -14,7 +14,7 @@ struct nf_ct_timeout { __u16 l3num; const struct nf_conntrack_l4proto *l4proto; - char data[0]; + char data[]; }; struct ctnl_timeout { diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index e0f709d9d547..f523ea87b6ae 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -16,6 +16,35 @@ struct nf_flow_rule; struct flow_offload; enum flow_offload_tuple_dir; +struct nf_flow_key { + struct flow_dissector_key_meta meta; + struct flow_dissector_key_control control; + struct flow_dissector_key_control enc_control; + struct flow_dissector_key_basic basic; + union { + struct flow_dissector_key_ipv4_addrs ipv4; + struct flow_dissector_key_ipv6_addrs ipv6; + }; + struct flow_dissector_key_keyid enc_key_id; + union { + struct flow_dissector_key_ipv4_addrs enc_ipv4; + struct flow_dissector_key_ipv6_addrs enc_ipv6; + }; + struct flow_dissector_key_tcp tcp; + struct flow_dissector_key_ports tp; +} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ + +struct nf_flow_match { + struct flow_dissector dissector; + struct nf_flow_key key; + struct nf_flow_key mask; +}; + +struct nf_flow_rule { + struct nf_flow_match match; + struct flow_rule *rule; +}; + struct nf_flowtable_type { struct list_head list; int family; @@ -44,6 +73,7 @@ struct nf_flowtable { struct delayed_work gc_work; unsigned int flags; struct flow_block flow_block; + struct mutex flow_block_lock; /* Guards flow_block */ possible_net_t net; }; @@ -129,10 +159,18 @@ struct nf_flow_route { struct flow_offload *flow_offload_alloc(struct nf_conn *ct); void flow_offload_free(struct flow_offload *flow); +int nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table, + flow_setup_cb_t *cb, void *cb_priv); +void nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, + flow_setup_cb_t *cb, void *cb_priv); + int flow_offload_route_init(struct flow_offload *flow, const struct nf_flow_route *route); int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); +void flow_offload_refresh(struct nf_flowtable *flow_table, + struct flow_offload *flow); + struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table, struct flow_offload_tuple *tuple); void nf_flow_table_cleanup(struct net_device *dev); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 4170c033d461..5d80e09f8148 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -224,7 +224,7 @@ int nft_validate_register_store(const struct nft_ctx *ctx, */ struct nft_userdata { u8 len; - unsigned char data[0]; + unsigned char data[]; }; /** @@ -385,21 +385,14 @@ struct nft_set_ops { * struct nft_set_type - nf_tables set type * * @ops: set ops for this type - * @list: used internally - * @owner: module reference * @features: features supported by the implementation */ struct nft_set_type { const struct nft_set_ops ops; - struct list_head list; - struct module *owner; u32 features; }; #define to_set_type(o) container_of(o, struct nft_set_type, ops) -int nft_register_set(struct nft_set_type *type); -void nft_unregister_set(struct nft_set_type *type); - /** * struct nft_set - nf_tables set instance * @@ -572,7 +565,7 @@ struct nft_set_ext_tmpl { struct nft_set_ext { u8 genmask; u8 offset[NFT_SET_EXT_NUM]; - char data[0]; + char data[]; }; static inline void nft_set_ext_prepare(struct nft_set_ext_tmpl *tmpl) @@ -673,6 +666,10 @@ static inline struct nft_object **nft_set_ext_obj(const struct nft_set_ext *ext) return nft_set_ext(ext, NFT_SET_EXT_OBJREF); } +struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nlattr *attr); + void *nft_set_elem_init(const struct nft_set *set, const struct nft_set_ext_tmpl *tmpl, const u32 *key, const u32 *key_end, const u32 *data, @@ -849,8 +846,6 @@ static inline void *nft_expr_priv(const struct nft_expr *expr) return (void *)expr->data; } -struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, - const struct nlattr *nla); void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr); int nft_expr_dump(struct sk_buff *skb, unsigned int attr, const struct nft_expr *expr); @@ -895,6 +890,18 @@ static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule) return (void *)&rule->data[rule->dlen]; } +static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_expr *expr; + + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) { + expr = nft_set_ext_expr(ext); + expr->ops->eval(expr, regs, pkt); + } +} + /* * The last pointer isn't really necessary, but the compiler isn't able to * determine that the result of nft_expr_last() is always the same since it @@ -1253,9 +1260,6 @@ void nft_trace_notify(struct nft_traceinfo *info); #define MODULE_ALIAS_NFT_EXPR(name) \ MODULE_ALIAS("nft-expr-" name) -#define MODULE_ALIAS_NFT_SET() \ - MODULE_ALIAS("nft-set") - #define MODULE_ALIAS_NFT_OBJ(type) \ MODULE_ALIAS("nft-obj-" __stringify(type)) @@ -1385,7 +1389,7 @@ struct nft_trans { int msg_type; bool put_net; struct nft_ctx ctx; - char data[0]; + char data[]; }; struct nft_trans_rule { diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 29e7e1021267..78516de14d31 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -69,12 +69,13 @@ extern const struct nft_expr_ops nft_payload_fast_ops; extern struct static_key_false nft_counters_enabled; extern struct static_key_false nft_trace_enabled; -extern struct nft_set_type nft_set_rhash_type; -extern struct nft_set_type nft_set_hash_type; -extern struct nft_set_type nft_set_hash_fast_type; -extern struct nft_set_type nft_set_rbtree_type; -extern struct nft_set_type nft_set_bitmap_type; -extern struct nft_set_type nft_set_pipapo_type; +extern const struct nft_set_type nft_set_rhash_type; +extern const struct nft_set_type nft_set_hash_type; +extern const struct nft_set_type nft_set_hash_fast_type; +extern const struct nft_set_type nft_set_rbtree_type; +extern const struct nft_set_type nft_set_bitmap_type; +extern const struct nft_set_type nft_set_pipapo_type; +extern const struct nft_set_type nft_set_pipapo_avx2_type; struct nft_expr; struct nft_regs; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 08b98414d94e..154b8f01499b 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -101,6 +101,7 @@ struct netns_ipv4 { int sysctl_ip_fwd_use_pmtu; int sysctl_ip_fwd_update_priority; int sysctl_ip_nonlocal_bind; + int sysctl_ip_autobind_reuse; /* Shall we try to damage output packets if routing dev changes? */ int sysctl_ip_dynaddr; int sysctl_ip_early_demux; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 341a66af8d59..1db8b27d4515 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -72,6 +72,10 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block) int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode); +int tcf_classify_ingress(struct sk_buff *skb, + const struct tcf_block *ingress_block, + const struct tcf_proto *tp, struct tcf_result *res, + bool compat_mode); #else static inline bool tcf_block_shared(struct tcf_block *block) @@ -133,6 +137,15 @@ static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, { return TC_ACT_UNSPEC; } + +static inline int tcf_classify_ingress(struct sk_buff *skb, + const struct tcf_block *ingress_block, + const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode) +{ + return TC_ACT_UNSPEC; +} + #endif static inline unsigned long @@ -727,6 +740,7 @@ struct tc_red_qopt_offload_params { u32 limit; bool is_ecn; bool is_harddrop; + bool is_nodrop; struct gnet_stats_queue *qstats; }; diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 20d2c6419612..9092e697059e 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -75,7 +75,15 @@ struct qdisc_watchdog { void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc, clockid_t clockid); void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc); -void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires); + +void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires, + u64 delta_ns); + +static inline void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, + u64 expires) +{ + return qdisc_watchdog_schedule_range_ns(wd, expires, 0ULL); +} static inline void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) diff --git a/include/net/red.h b/include/net/red.h index 9665582c4687..fc455445f4b2 100644 --- a/include/net/red.h +++ b/include/net/red.h @@ -179,6 +179,44 @@ static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog) return true; } +static inline int red_get_flags(unsigned char qopt_flags, + unsigned char historic_mask, + struct nlattr *flags_attr, + unsigned char supported_mask, + struct nla_bitfield32 *p_flags, + unsigned char *p_userbits, + struct netlink_ext_ack *extack) +{ + struct nla_bitfield32 flags; + + if (qopt_flags && flags_attr) { + NL_SET_ERR_MSG_MOD(extack, "flags should be passed either through qopt, or through a dedicated attribute"); + return -EINVAL; + } + + if (flags_attr) { + flags = nla_get_bitfield32(flags_attr); + } else { + flags.selector = historic_mask; + flags.value = qopt_flags & historic_mask; + } + + *p_flags = flags; + *p_userbits = qopt_flags & ~historic_mask; + return 0; +} + +static inline int red_validate_flags(unsigned char flags, + struct netlink_ext_ack *extack) +{ + if ((flags & TC_RED_NODROP) && !(flags & TC_RED_ECN)) { + NL_SET_ERR_MSG_MOD(extack, "nodrop mode is only meaningful with ECN"); + return -EINVAL; + } + + return 0; +} + static inline void red_set_parms(struct red_parms *p, u32 qth_min, u32 qth_max, u8 Wlog, u8 Plog, u8 Scell_log, u8 *stab, u32 max_P) diff --git a/include/net/route.h b/include/net/route.h index 81750ae50833..ff021cab657e 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -231,7 +231,7 @@ int ip_rt_ioctl(struct net *, unsigned int cmd, struct rtentry *rt); void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt); struct rtable *rt_dst_alloc(struct net_device *dev, unsigned int flags, u16 type, - bool nopolicy, bool noxfrm, bool will_cache); + bool nopolicy, bool noxfrm); struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt); struct in_ifaddr; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 7bfc45c5b602..bcdf98d21094 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -153,11 +153,6 @@ static inline bool qdisc_is_empty(const struct Qdisc *qdisc) return !READ_ONCE(qdisc->q.qlen); } -static inline bool qdisc_hashed(struct Qdisc *qdisc) -{ - return hash_hashed(&qdisc->hash); -} - static inline bool qdisc_run_begin(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) { @@ -634,7 +629,6 @@ void qdisc_class_hash_grow(struct Qdisc *, struct Qdisc_class_hash *); void qdisc_class_hash_destroy(struct Qdisc_class_hash *); int dev_qdisc_change_tx_queue_len(struct net_device *dev); -void dev_qdisc_set_real_num_tx_queues(struct net_device *dev); void dev_init_scheduler(struct net_device *dev); void dev_shutdown(struct net_device *dev); void dev_activate(struct net_device *dev); @@ -1275,6 +1269,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res, */ struct mini_Qdisc { struct tcf_proto *filter_list; + struct tcf_block *block; struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; struct rcu_head rcu; @@ -1301,6 +1296,8 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, struct tcf_proto *tp_head); void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, struct mini_Qdisc __rcu **p_miniq); +void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp, + struct tcf_block *block); static inline int skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res) { diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h index cf3492e2a6a4..79654bcb9a29 100644 --- a/include/net/tc_act/tc_ct.h +++ b/include/net/tc_act/tc_ct.h @@ -27,6 +27,7 @@ struct tcf_ct_params { struct rcu_head rcu; struct tcf_ct_flow_table *ct_ft; + struct nf_flowtable *nf_ft; }; struct tcf_ct { @@ -50,11 +51,27 @@ static inline int tcf_ct_action(const struct tc_action *a) return to_ct_params(a)->ct_action; } +static inline struct nf_flowtable *tcf_ct_ft(const struct tc_action *a) +{ + return to_ct_params(a)->nf_ft; +} + #else static inline uint16_t tcf_ct_zone(const struct tc_action *a) { return 0; } static inline int tcf_ct_action(const struct tc_action *a) { return 0; } +static inline struct nf_flowtable *tcf_ct_ft(const struct tc_action *a) +{ + return NULL; +} #endif /* CONFIG_NF_CONNTRACK */ +#if IS_ENABLED(CONFIG_NET_ACT_CT) +void tcf_ct_flow_table_restore_skb(struct sk_buff *skb, unsigned long cookie); +#else +static inline void +tcf_ct_flow_table_restore_skb(struct sk_buff *skb, unsigned long cookie) { } +#endif + static inline bool is_tcf_ct(const struct tc_action *a) { #if defined(CONFIG_NET_CLS_ACT) && IS_ENABLED(CONFIG_NF_CONNTRACK) diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h index b22a1f641f02..00bfee70609e 100644 --- a/include/net/tc_act/tc_skbedit.h +++ b/include/net/tc_act/tc_skbedit.h @@ -27,8 +27,8 @@ struct tcf_skbedit { }; #define to_skbedit(a) ((struct tcf_skbedit *)a) -/* Return true iff action is mark */ -static inline bool is_tcf_skbedit_mark(const struct tc_action *a) +/* Return true iff action is the one identified by FLAG. */ +static inline bool is_tcf_skbedit_with_flag(const struct tc_action *a, u32 flag) { #ifdef CONFIG_NET_CLS_ACT u32 flags; @@ -37,12 +37,18 @@ static inline bool is_tcf_skbedit_mark(const struct tc_action *a) rcu_read_lock(); flags = rcu_dereference(to_skbedit(a)->params)->flags; rcu_read_unlock(); - return flags == SKBEDIT_F_MARK; + return flags == flag; } #endif return false; } +/* Return true iff action is mark */ +static inline bool is_tcf_skbedit_mark(const struct tc_action *a) +{ + return is_tcf_skbedit_with_flag(a, SKBEDIT_F_MARK); +} + static inline u32 tcf_skbedit_mark(const struct tc_action *a) { u32 mark; @@ -57,17 +63,7 @@ static inline u32 tcf_skbedit_mark(const struct tc_action *a) /* Return true iff action is ptype */ static inline bool is_tcf_skbedit_ptype(const struct tc_action *a) { -#ifdef CONFIG_NET_CLS_ACT - u32 flags; - - if (a->ops && a->ops->id == TCA_ID_SKBEDIT) { - rcu_read_lock(); - flags = rcu_dereference(to_skbedit(a)->params)->flags; - rcu_read_unlock(); - return flags == SKBEDIT_F_PTYPE; - } -#endif - return false; + return is_tcf_skbedit_with_flag(a, SKBEDIT_F_PTYPE); } static inline u32 tcf_skbedit_ptype(const struct tc_action *a) @@ -81,4 +77,21 @@ static inline u32 tcf_skbedit_ptype(const struct tc_action *a) return ptype; } +/* Return true iff action is priority */ +static inline bool is_tcf_skbedit_priority(const struct tc_action *a) +{ + return is_tcf_skbedit_with_flag(a, SKBEDIT_F_PRIORITY); +} + +static inline u32 tcf_skbedit_priority(const struct tc_action *a) +{ + u32 priority; + + rcu_read_lock(); + priority = rcu_dereference(to_skbedit(a)->params)->priority; + rcu_read_unlock(); + + return priority; +} + #endif /* __NET_TC_SKBEDIT_H */ diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h index 2b3df076e5b6..e1057b255f69 100644 --- a/include/net/tc_act/tc_tunnel_key.h +++ b/include/net/tc_act/tc_tunnel_key.h @@ -28,8 +28,10 @@ static inline bool is_tcf_tunnel_set(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT struct tcf_tunnel_key *t = to_tunnel_key(a); - struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); + struct tcf_tunnel_key_params *params; + params = rcu_dereference_protected(t->params, + lockdep_is_held(&a->tcfa_lock)); if (a->ops && a->ops->id == TCA_ID_TUNNEL_KEY) return params->tcft_action == TCA_TUNNEL_KEY_ACT_SET; #endif @@ -40,8 +42,10 @@ static inline bool is_tcf_tunnel_release(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT struct tcf_tunnel_key *t = to_tunnel_key(a); - struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); + struct tcf_tunnel_key_params *params; + params = rcu_dereference_protected(t->params, + lockdep_is_held(&a->tcfa_lock)); if (a->ops && a->ops->id == TCA_ID_TUNNEL_KEY) return params->tcft_action == TCA_TUNNEL_KEY_ACT_RELEASE; #endif diff --git a/include/net/tcp.h b/include/net/tcp.h index 07f947cc80e6..43fa07a36fa6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2195,21 +2195,23 @@ void tcp_update_ulp(struct sock *sk, struct proto *p, struct sk_msg; struct sk_psock; -int tcp_bpf_init(struct sock *sk); -void tcp_bpf_reinit(struct sock *sk); +#ifdef CONFIG_BPF_STREAM_PARSER +struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock); +void tcp_bpf_clone(const struct sock *sk, struct sock *newsk); +#else +static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk) +{ +} +#endif /* CONFIG_BPF_STREAM_PARSER */ + +#ifdef CONFIG_NET_SOCK_MSG int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes, int flags); int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, int len, int flags); -#ifdef CONFIG_NET_SOCK_MSG -void tcp_bpf_clone(const struct sock *sk, struct sock *newsk); -#else -static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk) -{ -} -#endif +#endif /* CONFIG_NET_SOCK_MSG */ /* Call BPF_SOCK_OPS program that returns an int. If the return value * is < 0, then the BPF op failed (for example if the loaded BPF diff --git a/include/net/udp.h b/include/net/udp.h index e55d5f765807..a8fa6c0c6ded 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -503,4 +503,9 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk, return segs; } +#ifdef CONFIG_BPF_STREAM_PARSER +struct sk_psock; +struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock); +#endif /* BPF_STREAM_PARSER */ + #endif /* _UDP_H */ diff --git a/include/soc/mscc/ocelot_dev.h b/include/soc/mscc/ocelot_dev.h index 0a50d53bbd3f..7c08437061fc 100644 --- a/include/soc/mscc/ocelot_dev.h +++ b/include/soc/mscc/ocelot_dev.h @@ -74,7 +74,7 @@ #define DEV_MAC_TAGS_CFG_TAG_ID_M GENMASK(31, 16) #define DEV_MAC_TAGS_CFG_TAG_ID_X(x) (((x) & GENMASK(31, 16)) >> 16) #define DEV_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA BIT(2) -#define DEV_MAC_TAGS_CFG_PB_ENA BIT(1) +#define DEV_MAC_TAGS_CFG_VLAN_DBL_AWR_ENA BIT(1) #define DEV_MAC_TAGS_CFG_VLAN_AWR_ENA BIT(0) #define DEV_MAC_ADV_CHK_CFG 0x2c diff --git a/include/sound/soc.h b/include/sound/soc.h index f0e4f36f83bf..8a2266676b2d 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -1157,7 +1157,7 @@ struct snd_soc_pcm_runtime { ((i) < rtd->num_codecs) && ((dai) = rtd->codec_dais[i]); \ (i)++) #define for_each_rtd_codec_dai_rollback(rtd, i, dai) \ - for (; ((--i) >= 0) && ((dai) = rtd->codec_dais[i]);) + for (; (--(i) >= 0) && ((dai) = rtd->codec_dais[i]);) void snd_soc_close_delayed_work(struct snd_soc_pcm_runtime *rtd); diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index b04c29270973..1ce3be63add1 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -75,13 +75,17 @@ static inline void bpf_test_probe_##call(void) \ check_trace_callback_type_##call(__bpf_trace_##template); \ } \ typedef void (*btf_trace_##call)(void *__data, proto); \ -static struct bpf_raw_event_map __used \ - __attribute__((section("__bpf_raw_tp_map"))) \ -__bpf_trace_tp_map_##call = { \ - .tp = &__tracepoint_##call, \ - .bpf_func = (void *)(btf_trace_##call)__bpf_trace_##template, \ - .num_args = COUNT_ARGS(args), \ - .writable_size = size, \ +static union { \ + struct bpf_raw_event_map event; \ + btf_trace_##call handler; \ +} __bpf_trace_tp_map_##call __used \ +__attribute__((section("__bpf_raw_tp_map"))) = { \ + .event = { \ + .tp = &__tracepoint_##call, \ + .bpf_func = __bpf_trace_##template, \ + .num_args = COUNT_ARGS(args), \ + .writable_size = size, \ + }, \ }; #define FIRST(x, ...) x diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8e98ced0963b..5d01c5c7e598 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -210,6 +210,7 @@ enum bpf_attach_type { BPF_TRACE_RAW_TP, BPF_TRACE_FENTRY, BPF_TRACE_FEXIT, + BPF_MODIFY_RETURN, __MAX_BPF_ATTACH_TYPE }; @@ -325,44 +326,46 @@ enum bpf_attach_type { #define BPF_PSEUDO_CALL 1 /* flags for BPF_MAP_UPDATE_ELEM command */ -#define BPF_ANY 0 /* create new element or update existing */ -#define BPF_NOEXIST 1 /* create new element if it didn't exist */ -#define BPF_EXIST 2 /* update existing element */ -#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */ +enum { + BPF_ANY = 0, /* create new element or update existing */ + BPF_NOEXIST = 1, /* create new element if it didn't exist */ + BPF_EXIST = 2, /* update existing element */ + BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */ +}; /* flags for BPF_MAP_CREATE command */ -#define BPF_F_NO_PREALLOC (1U << 0) +enum { + BPF_F_NO_PREALLOC = (1U << 0), /* Instead of having one common LRU list in the * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list * which can scale and perform better. * Note, the LRU nodes (including free nodes) cannot be moved * across different LRU lists. */ -#define BPF_F_NO_COMMON_LRU (1U << 1) + BPF_F_NO_COMMON_LRU = (1U << 1), /* Specify numa node during map creation */ -#define BPF_F_NUMA_NODE (1U << 2) - -#define BPF_OBJ_NAME_LEN 16U + BPF_F_NUMA_NODE = (1U << 2), /* Flags for accessing BPF object from syscall side. */ -#define BPF_F_RDONLY (1U << 3) -#define BPF_F_WRONLY (1U << 4) + BPF_F_RDONLY = (1U << 3), + BPF_F_WRONLY = (1U << 4), /* Flag for stack_map, store build_id+offset instead of pointer */ -#define BPF_F_STACK_BUILD_ID (1U << 5) + BPF_F_STACK_BUILD_ID = (1U << 5), /* Zero-initialize hash function seed. This should only be used for testing. */ -#define BPF_F_ZERO_SEED (1U << 6) + BPF_F_ZERO_SEED = (1U << 6), /* Flags for accessing BPF object from program side. */ -#define BPF_F_RDONLY_PROG (1U << 7) -#define BPF_F_WRONLY_PROG (1U << 8) + BPF_F_RDONLY_PROG = (1U << 7), + BPF_F_WRONLY_PROG = (1U << 8), /* Clone map from listener for newly accepted socket */ -#define BPF_F_CLONE (1U << 9) + BPF_F_CLONE = (1U << 9), /* Enable memory-mapping BPF map */ -#define BPF_F_MMAPABLE (1U << 10) + BPF_F_MMAPABLE = (1U << 10), +}; /* Flags for BPF_PROG_QUERY. */ @@ -391,6 +394,8 @@ struct bpf_stack_build_id { }; }; +#define BPF_OBJ_NAME_LEN 16U + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -2909,6 +2914,42 @@ union bpf_attr { * of sizeof(struct perf_branch_entry). * * **-ENOENT** if architecture does not support branch records. + * + * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) + * Description + * Returns 0 on success, values for *pid* and *tgid* as seen from the current + * *namespace* will be returned in *nsdata*. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** if dev and inum supplied don't match dev_t and inode number + * with nsfs of current task, or if dev conversion to dev_t lost high bits. + * + * **-ENOENT** if pidns does not exists for the current task. + * + * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * Description + * Write raw *data* blob into a special BPF perf event held by + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf + * event must have the following attributes: **PERF_SAMPLE_RAW** + * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and + * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. + * + * The *flags* are used to indicate the index in *map* for which + * the value must be put, masked with **BPF_F_INDEX_MASK**. + * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** + * to indicate that the index of the current CPU core should be + * used. + * + * The value to write, of *size*, is passed through eBPF stack and + * pointed by *data*. + * + * *ctx* is a pointer to in-kernel struct xdp_buff. + * + * This helper is similar to **bpf_perf_eventoutput**\ () but + * restricted to raw_tracepoint bpf programs. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3030,7 +3071,9 @@ union bpf_attr { FN(tcp_send_ack), \ FN(send_signal_thread), \ FN(jiffies64), \ - FN(read_branch_records), + FN(read_branch_records), \ + FN(get_ns_current_pid_tgid), \ + FN(xdp_output), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3045,72 +3088,100 @@ enum bpf_func_id { /* All flags used by eBPF helper functions, placed here. */ /* BPF_FUNC_skb_store_bytes flags. */ -#define BPF_F_RECOMPUTE_CSUM (1ULL << 0) -#define BPF_F_INVALIDATE_HASH (1ULL << 1) +enum { + BPF_F_RECOMPUTE_CSUM = (1ULL << 0), + BPF_F_INVALIDATE_HASH = (1ULL << 1), +}; /* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags. * First 4 bits are for passing the header field size. */ -#define BPF_F_HDR_FIELD_MASK 0xfULL +enum { + BPF_F_HDR_FIELD_MASK = 0xfULL, +}; /* BPF_FUNC_l4_csum_replace flags. */ -#define BPF_F_PSEUDO_HDR (1ULL << 4) -#define BPF_F_MARK_MANGLED_0 (1ULL << 5) -#define BPF_F_MARK_ENFORCE (1ULL << 6) +enum { + BPF_F_PSEUDO_HDR = (1ULL << 4), + BPF_F_MARK_MANGLED_0 = (1ULL << 5), + BPF_F_MARK_ENFORCE = (1ULL << 6), +}; /* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ -#define BPF_F_INGRESS (1ULL << 0) +enum { + BPF_F_INGRESS = (1ULL << 0), +}; /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ -#define BPF_F_TUNINFO_IPV6 (1ULL << 0) +enum { + BPF_F_TUNINFO_IPV6 = (1ULL << 0), +}; /* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */ -#define BPF_F_SKIP_FIELD_MASK 0xffULL -#define BPF_F_USER_STACK (1ULL << 8) +enum { + BPF_F_SKIP_FIELD_MASK = 0xffULL, + BPF_F_USER_STACK = (1ULL << 8), /* flags used by BPF_FUNC_get_stackid only. */ -#define BPF_F_FAST_STACK_CMP (1ULL << 9) -#define BPF_F_REUSE_STACKID (1ULL << 10) + BPF_F_FAST_STACK_CMP = (1ULL << 9), + BPF_F_REUSE_STACKID = (1ULL << 10), /* flags used by BPF_FUNC_get_stack only. */ -#define BPF_F_USER_BUILD_ID (1ULL << 11) + BPF_F_USER_BUILD_ID = (1ULL << 11), +}; /* BPF_FUNC_skb_set_tunnel_key flags. */ -#define BPF_F_ZERO_CSUM_TX (1ULL << 1) -#define BPF_F_DONT_FRAGMENT (1ULL << 2) -#define BPF_F_SEQ_NUMBER (1ULL << 3) +enum { + BPF_F_ZERO_CSUM_TX = (1ULL << 1), + BPF_F_DONT_FRAGMENT = (1ULL << 2), + BPF_F_SEQ_NUMBER = (1ULL << 3), +}; /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and * BPF_FUNC_perf_event_read_value flags. */ -#define BPF_F_INDEX_MASK 0xffffffffULL -#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK +enum { + BPF_F_INDEX_MASK = 0xffffffffULL, + BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK, /* BPF_FUNC_perf_event_output for sk_buff input context. */ -#define BPF_F_CTXLEN_MASK (0xfffffULL << 32) + BPF_F_CTXLEN_MASK = (0xfffffULL << 32), +}; /* Current network namespace */ -#define BPF_F_CURRENT_NETNS (-1L) +enum { + BPF_F_CURRENT_NETNS = (-1L), +}; /* BPF_FUNC_skb_adjust_room flags. */ -#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0) +enum { + BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0), + BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 = (1ULL << 1), + BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2), + BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), + BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), +}; -#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff -#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56 +enum { + BPF_ADJ_ROOM_ENCAP_L2_MASK = 0xff, + BPF_ADJ_ROOM_ENCAP_L2_SHIFT = 56, +}; -#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1) -#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2) -#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3) -#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4) #define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \ BPF_ADJ_ROOM_ENCAP_L2_MASK) \ << BPF_ADJ_ROOM_ENCAP_L2_SHIFT) /* BPF_FUNC_sysctl_get_name flags. */ -#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0) +enum { + BPF_F_SYSCTL_BASE_NAME = (1ULL << 0), +}; /* BPF_FUNC_sk_storage_get flags */ -#define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0) +enum { + BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0), +}; /* BPF_FUNC_read_branch_records flags. */ -#define BPF_F_GET_BRANCH_RECORDS_SIZE (1ULL << 0) +enum { + BPF_F_GET_BRANCH_RECORDS_SIZE = (1ULL << 0), +}; /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { @@ -3176,6 +3247,7 @@ struct __sk_buff { __u32 wire_len; __u32 gso_segs; __bpf_md_ptr(struct bpf_sock *, sk); + __u32 gso_size; }; struct bpf_tunnel_key { @@ -3528,13 +3600,14 @@ struct bpf_sock_ops { }; /* Definitions for bpf_sock_ops_cb_flags */ -#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) -#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) -#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) -#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3) -#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently - * supported cb flags - */ +enum { + BPF_SOCK_OPS_RTO_CB_FLAG = (1<<0), + BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1), + BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2), + BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3), +/* Mask of all currently supported cb flags */ + BPF_SOCK_OPS_ALL_CB_FLAGS = 0xF, +}; /* List of known BPF sock_ops operators. * New entries can only be added at the end @@ -3613,8 +3686,10 @@ enum { BPF_TCP_MAX_STATES /* Leave at the end! */ }; -#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ -#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ +enum { + TCP_BPF_IW = 1001, /* Set TCP initial congestion window */ + TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */ +}; struct bpf_perf_event_value { __u64 counter; @@ -3622,12 +3697,16 @@ struct bpf_perf_event_value { __u64 running; }; -#define BPF_DEVCG_ACC_MKNOD (1ULL << 0) -#define BPF_DEVCG_ACC_READ (1ULL << 1) -#define BPF_DEVCG_ACC_WRITE (1ULL << 2) +enum { + BPF_DEVCG_ACC_MKNOD = (1ULL << 0), + BPF_DEVCG_ACC_READ = (1ULL << 1), + BPF_DEVCG_ACC_WRITE = (1ULL << 2), +}; -#define BPF_DEVCG_DEV_BLOCK (1ULL << 0) -#define BPF_DEVCG_DEV_CHAR (1ULL << 1) +enum { + BPF_DEVCG_DEV_BLOCK = (1ULL << 0), + BPF_DEVCG_DEV_CHAR = (1ULL << 1), +}; struct bpf_cgroup_dev_ctx { /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ @@ -3643,8 +3722,10 @@ struct bpf_raw_tracepoint_args { /* DIRECT: Skip the FIB rules and go to FIB table associated with device * OUTPUT: Do lookup from egress perspective; default is ingress */ -#define BPF_FIB_LOOKUP_DIRECT (1U << 0) -#define BPF_FIB_LOOKUP_OUTPUT (1U << 1) +enum { + BPF_FIB_LOOKUP_DIRECT = (1U << 0), + BPF_FIB_LOOKUP_OUTPUT = (1U << 1), +}; enum { BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ @@ -3716,9 +3797,11 @@ enum bpf_task_fd_type { BPF_FD_TYPE_URETPROBE, /* filename + offset */ }; -#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0) -#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1) -#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2) +enum { + BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG = (1U << 0), + BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL = (1U << 1), + BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP = (1U << 2), +}; struct bpf_flow_keys { __u16 nhoff; @@ -3784,4 +3867,8 @@ struct bpf_sockopt { __s32 retval; }; +struct bpf_pidns_info { + __u32 pid; + __u32 tgid; +}; #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 2df8ceca1f9b..6622912c2342 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -272,9 +272,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 41 +#define DM_VERSION_MINOR 42 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2019-09-16)" +#define DM_VERSION_EXTRA "-ioctl (2020-02-27)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 7e0b460f872c..c7c7a1a550af 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -24,6 +24,14 @@ enum { ETHTOOL_MSG_DEBUG_SET, ETHTOOL_MSG_WOL_GET, ETHTOOL_MSG_WOL_SET, + ETHTOOL_MSG_FEATURES_GET, + ETHTOOL_MSG_FEATURES_SET, + ETHTOOL_MSG_PRIVFLAGS_GET, + ETHTOOL_MSG_PRIVFLAGS_SET, + ETHTOOL_MSG_RINGS_GET, + ETHTOOL_MSG_RINGS_SET, + ETHTOOL_MSG_CHANNELS_GET, + ETHTOOL_MSG_CHANNELS_SET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -43,6 +51,15 @@ enum { ETHTOOL_MSG_DEBUG_NTF, ETHTOOL_MSG_WOL_GET_REPLY, ETHTOOL_MSG_WOL_NTF, + ETHTOOL_MSG_FEATURES_GET_REPLY, + ETHTOOL_MSG_FEATURES_SET_REPLY, + ETHTOOL_MSG_FEATURES_NTF, + ETHTOOL_MSG_PRIVFLAGS_GET_REPLY, + ETHTOOL_MSG_PRIVFLAGS_NTF, + ETHTOOL_MSG_RINGS_GET_REPLY, + ETHTOOL_MSG_RINGS_NTF, + ETHTOOL_MSG_CHANNELS_GET_REPLY, + ETHTOOL_MSG_CHANNELS_NTF, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -228,6 +245,71 @@ enum { ETHTOOL_A_WOL_MAX = __ETHTOOL_A_WOL_CNT - 1 }; +/* FEATURES */ + +enum { + ETHTOOL_A_FEATURES_UNSPEC, + ETHTOOL_A_FEATURES_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_FEATURES_HW, /* bitset */ + ETHTOOL_A_FEATURES_WANTED, /* bitset */ + ETHTOOL_A_FEATURES_ACTIVE, /* bitset */ + ETHTOOL_A_FEATURES_NOCHANGE, /* bitset */ + + /* add new constants above here */ + __ETHTOOL_A_FEATURES_CNT, + ETHTOOL_A_FEATURES_MAX = __ETHTOOL_A_FEATURES_CNT - 1 +}; + +/* PRIVFLAGS */ + +enum { + ETHTOOL_A_PRIVFLAGS_UNSPEC, + ETHTOOL_A_PRIVFLAGS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PRIVFLAGS_FLAGS, /* bitset */ + + /* add new constants above here */ + __ETHTOOL_A_PRIVFLAGS_CNT, + ETHTOOL_A_PRIVFLAGS_MAX = __ETHTOOL_A_PRIVFLAGS_CNT - 1 +}; + +/* RINGS */ + +enum { + ETHTOOL_A_RINGS_UNSPEC, + ETHTOOL_A_RINGS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_RINGS_RX_MAX, /* u32 */ + ETHTOOL_A_RINGS_RX_MINI_MAX, /* u32 */ + ETHTOOL_A_RINGS_RX_JUMBO_MAX, /* u32 */ + ETHTOOL_A_RINGS_TX_MAX, /* u32 */ + ETHTOOL_A_RINGS_RX, /* u32 */ + ETHTOOL_A_RINGS_RX_MINI, /* u32 */ + ETHTOOL_A_RINGS_RX_JUMBO, /* u32 */ + ETHTOOL_A_RINGS_TX, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_RINGS_CNT, + ETHTOOL_A_RINGS_MAX = (__ETHTOOL_A_RINGS_CNT - 1) +}; + +/* CHANNELS */ + +enum { + ETHTOOL_A_CHANNELS_UNSPEC, + ETHTOOL_A_CHANNELS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_CHANNELS_RX_MAX, /* u32 */ + ETHTOOL_A_CHANNELS_TX_MAX, /* u32 */ + ETHTOOL_A_CHANNELS_OTHER_MAX, /* u32 */ + ETHTOOL_A_CHANNELS_COMBINED_MAX, /* u32 */ + ETHTOOL_A_CHANNELS_RX_COUNT, /* u32 */ + ETHTOOL_A_CHANNELS_TX_COUNT, /* u32 */ + ETHTOOL_A_CHANNELS_OTHER_COUNT, /* u32 */ + ETHTOOL_A_CHANNELS_COMBINED_COUNT, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_CHANNELS_CNT, + ETHTOOL_A_CHANNELS_MAX = (__ETHTOOL_A_CHANNELS_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 42f7ca38ad80..bfe621ea51b3 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -174,6 +174,16 @@ struct br_vlan_msg { __u32 ifindex; }; +enum { + BRIDGE_VLANDB_DUMP_UNSPEC, + BRIDGE_VLANDB_DUMP_FLAGS, + __BRIDGE_VLANDB_DUMP_MAX, +}; +#define BRIDGE_VLANDB_DUMP_MAX (__BRIDGE_VLANDB_DUMP_MAX - 1) + +/* flags used in BRIDGE_VLANDB_DUMP_FLAGS attribute to affect dumps */ +#define BRIDGE_VLANDB_DUMPF_STATS (1 << 0) /* Include stats in the dump */ + /* Bridge vlan RTM attributes * [BRIDGE_VLANDB_ENTRY] = { * [BRIDGE_VLANDB_ENTRY_INFO] @@ -192,10 +202,46 @@ enum { BRIDGE_VLANDB_ENTRY_INFO, BRIDGE_VLANDB_ENTRY_RANGE, BRIDGE_VLANDB_ENTRY_STATE, + BRIDGE_VLANDB_ENTRY_TUNNEL_INFO, + BRIDGE_VLANDB_ENTRY_STATS, __BRIDGE_VLANDB_ENTRY_MAX, }; #define BRIDGE_VLANDB_ENTRY_MAX (__BRIDGE_VLANDB_ENTRY_MAX - 1) +/* [BRIDGE_VLANDB_ENTRY] = { + * [BRIDGE_VLANDB_ENTRY_TUNNEL_INFO] = { + * [BRIDGE_VLANDB_TINFO_ID] + * ... + * } + * } + */ +enum { + BRIDGE_VLANDB_TINFO_UNSPEC, + BRIDGE_VLANDB_TINFO_ID, + BRIDGE_VLANDB_TINFO_CMD, + __BRIDGE_VLANDB_TINFO_MAX, +}; +#define BRIDGE_VLANDB_TINFO_MAX (__BRIDGE_VLANDB_TINFO_MAX - 1) + +/* [BRIDGE_VLANDB_ENTRY] = { + * [BRIDGE_VLANDB_ENTRY_STATS] = { + * [BRIDGE_VLANDB_STATS_RX_BYTES] + * ... + * } + * ... + * } + */ +enum { + BRIDGE_VLANDB_STATS_UNSPEC, + BRIDGE_VLANDB_STATS_RX_BYTES, + BRIDGE_VLANDB_STATS_RX_PACKETS, + BRIDGE_VLANDB_STATS_TX_BYTES, + BRIDGE_VLANDB_STATS_TX_PACKETS, + BRIDGE_VLANDB_STATS_PAD, + __BRIDGE_VLANDB_STATS_MAX, +}; +#define BRIDGE_VLANDB_STATS_MAX (__BRIDGE_VLANDB_STATS_MAX - 1) + /* Bridge multicast database attributes * [MDBA_MDB] = { * [MDBA_MDB_ENTRY] = { diff --git a/include/uapi/linux/if_macsec.h b/include/uapi/linux/if_macsec.h index 1d63c43c38cc..3af2aa069a36 100644 --- a/include/uapi/linux/if_macsec.h +++ b/include/uapi/linux/if_macsec.h @@ -22,9 +22,11 @@ #define MACSEC_KEYID_LEN 16 -/* cipher IDs as per IEEE802.1AEbn-2011 */ +/* cipher IDs as per IEEE802.1AE-2018 (Table 14-1) */ #define MACSEC_CIPHER_ID_GCM_AES_128 0x0080C20001000001ULL #define MACSEC_CIPHER_ID_GCM_AES_256 0x0080C20001000002ULL +#define MACSEC_CIPHER_ID_GCM_AES_XPN_128 0x0080C20001000003ULL +#define MACSEC_CIPHER_ID_GCM_AES_XPN_256 0x0080C20001000004ULL /* deprecated cipher ID for GCM-AES-128 */ #define MACSEC_DEFAULT_CIPHER_ID 0x0080020001000001ULL @@ -88,11 +90,13 @@ enum macsec_sa_attrs { MACSEC_SA_ATTR_UNSPEC, MACSEC_SA_ATTR_AN, /* config/dump, u8 0..3 */ MACSEC_SA_ATTR_ACTIVE, /* config/dump, u8 0..1 */ - MACSEC_SA_ATTR_PN, /* config/dump, u32 */ + MACSEC_SA_ATTR_PN, /* config/dump, u32/u64 (u64 if XPN) */ MACSEC_SA_ATTR_KEY, /* config, data */ MACSEC_SA_ATTR_KEYID, /* config/dump, 128-bit */ MACSEC_SA_ATTR_STATS, /* dump, nested, macsec_sa_stats_attr */ MACSEC_SA_ATTR_PAD, + MACSEC_SA_ATTR_SSCI, /* config/dump, u32 - XPN only */ + MACSEC_SA_ATTR_SALT, /* config, 96-bit - XPN only */ __MACSEC_SA_ATTR_END, NUM_MACSEC_SA_ATTR = __MACSEC_SA_ATTR_END, MACSEC_SA_ATTR_MAX = __MACSEC_SA_ATTR_END - 1, diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 1521073b6348..8533bf07450f 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -74,6 +74,8 @@ enum { #define IPPROTO_UDPLITE IPPROTO_UDPLITE IPPROTO_MPLS = 137, /* MPLS in IP (RFC 4023) */ #define IPPROTO_MPLS IPPROTO_MPLS + IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */ +#define IPPROTO_ETHERNET IPPROTO_ETHERNET IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW IPPROTO_MPTCP = 262, /* Multipath TCP connection */ diff --git a/include/uapi/linux/mii.h b/include/uapi/linux/mii.h index 0b9c3beda345..90f9b4e1ba27 100644 --- a/include/uapi/linux/mii.h +++ b/include/uapi/linux/mii.h @@ -134,11 +134,16 @@ /* MAC and PHY tx_config_Reg[15:0] for SGMII in-band auto-negotiation.*/ #define ADVERTISE_SGMII 0x0001 /* MAC can do SGMII */ #define LPA_SGMII 0x0001 /* PHY can do SGMII */ +#define LPA_SGMII_SPD_MASK 0x0c00 /* SGMII speed mask */ +#define LPA_SGMII_FULL_DUPLEX 0x1000 /* SGMII full duplex */ #define LPA_SGMII_DPX_SPD_MASK 0x1C00 /* SGMII duplex and speed bits */ +#define LPA_SGMII_10 0x0000 /* 10Mbps */ #define LPA_SGMII_10HALF 0x0000 /* Can do 10mbps half-duplex */ #define LPA_SGMII_10FULL 0x1000 /* Can do 10mbps full-duplex */ +#define LPA_SGMII_100 0x0400 /* 100Mbps */ #define LPA_SGMII_100HALF 0x0400 /* Can do 100mbps half-duplex */ #define LPA_SGMII_100FULL 0x1400 /* Can do 100mbps full-duplex */ +#define LPA_SGMII_1000 0x0800 /* 1000Mbps */ #define LPA_SGMII_1000HALF 0x0800 /* Can do 1000mbps half-duplex */ #define LPA_SGMII_1000FULL 0x1800 /* Can do 1000mbps full-duplex */ #define LPA_SGMII_LINK 0x8000 /* PHY link with copper-side partner */ diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 065218a20bb7..9c3d2d04d6a1 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1770,6 +1770,7 @@ enum nft_tunnel_opts_attributes { NFTA_TUNNEL_KEY_OPTS_UNSPEC, NFTA_TUNNEL_KEY_OPTS_VXLAN, NFTA_TUNNEL_KEY_OPTS_ERSPAN, + NFTA_TUNNEL_KEY_OPTS_GENEVE, __NFTA_TUNNEL_KEY_OPTS_MAX }; #define NFTA_TUNNEL_KEY_OPTS_MAX (__NFTA_TUNNEL_KEY_OPTS_MAX - 1) @@ -1791,6 +1792,15 @@ enum nft_tunnel_opts_erspan_attributes { }; #define NFTA_TUNNEL_KEY_ERSPAN_MAX (__NFTA_TUNNEL_KEY_ERSPAN_MAX - 1) +enum nft_tunnel_opts_geneve_attributes { + NFTA_TUNNEL_KEY_GENEVE_UNSPEC, + NFTA_TUNNEL_KEY_GENEVE_CLASS, + NFTA_TUNNEL_KEY_GENEVE_TYPE, + NFTA_TUNNEL_KEY_GENEVE_DATA, + __NFTA_TUNNEL_KEY_GENEVE_MAX +}; +#define NFTA_TUNNEL_KEY_GENEVE_MAX (__NFTA_TUNNEL_KEY_GENEVE_MAX - 1) + enum nft_tunnel_flags { NFT_TUNNEL_F_ZERO_CSUM_TX = (1 << 0), NFT_TUNNEL_F_DONT_FRAGMENT = (1 << 1), diff --git a/include/uapi/linux/netfilter/xt_IDLETIMER.h b/include/uapi/linux/netfilter/xt_IDLETIMER.h index 3c586a19baea..434e6506abaa 100644 --- a/include/uapi/linux/netfilter/xt_IDLETIMER.h +++ b/include/uapi/linux/netfilter/xt_IDLETIMER.h @@ -1,4 +1,3 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * linux/include/linux/netfilter/xt_IDLETIMER.h * @@ -33,6 +32,7 @@ #include <linux/types.h> #define MAX_IDLETIMER_LABEL_SIZE 28 +#define XT_IDLETIMER_ALARM 0x01 struct idletimer_tg_info { __u32 timeout; @@ -43,4 +43,14 @@ struct idletimer_tg_info { struct idletimer_tg *timer __attribute__((aligned(8))); }; +struct idletimer_tg_info_v1 { + __u32 timeout; + + char label[MAX_IDLETIMER_LABEL_SIZE]; + + __u8 timer_type; + + /* for kernel module internal use only */ + struct idletimer_tg *timer __attribute__((aligned(8))); +}; #endif diff --git a/include/uapi/linux/netfilter_bridge/ebt_among.h b/include/uapi/linux/netfilter_bridge/ebt_among.h index 9acf757bc1f7..73b26a280c4f 100644 --- a/include/uapi/linux/netfilter_bridge/ebt_among.h +++ b/include/uapi/linux/netfilter_bridge/ebt_among.h @@ -40,7 +40,7 @@ struct ebt_mac_wormhash_tuple { struct ebt_mac_wormhash { int table[257]; int poolsize; - struct ebt_mac_wormhash_tuple pool[0]; + struct ebt_mac_wormhash_tuple pool[]; }; #define ebt_mac_wormhash_size(x) ((x) ? sizeof(struct ebt_mac_wormhash) \ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index b002ef2060fa..2b691161830f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -11,7 +11,7 @@ * Copyright 2008 Jouni Malinen <jouni.malinen@atheros.com> * Copyright 2008 Colin McCabe <colin@cozybit.com> * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2019 Intel Corporation + * Copyright (C) 2018-2020 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -1632,7 +1632,8 @@ enum nl80211_commands { * flag is included, then control port frames are sent over NL80211 instead * using %CMD_CONTROL_PORT_FRAME. If control port routing over NL80211 is * to be used then userspace must also use the %NL80211_ATTR_SOCKET_OWNER - * flag. + * flag. When used with %NL80211_ATTR_CONTROL_PORT_NO_PREAUTH, pre-auth + * frames are not forwared over the control port. * * @NL80211_ATTR_TESTDATA: Testmode data blob, passed through to the driver. * We recommend using nested, driver-specific attributes within this. @@ -2442,6 +2443,33 @@ enum nl80211_commands { * on output (in wiphy attributes) it contains only the feature sub- * attributes. * + * @NL80211_ATTR_CONTROL_PORT_NO_PREAUTH: disable preauth frame rx on control + * port in order to forward/receive them as ordinary data frames. + * + * @NL80211_ATTR_PMK_LIFETIME: Maximum lifetime for PMKSA in seconds (u32, + * dot11RSNAConfigPMKReauthThreshold; 0 is not a valid value). + * An optional parameter configured through %NL80211_CMD_SET_PMKSA. + * Drivers that trigger roaming need to know the lifetime of the + * configured PMKSA for triggering the full vs. PMKSA caching based + * authentication. This timeout helps authentication methods like SAE, + * where PMK gets updated only by going through a full (new SAE) + * authentication instead of getting updated during an association for EAP + * authentication. No new full authentication within the PMK expiry shall + * result in a disassociation at the end of the lifetime. + * + * @NL80211_ATTR_PMK_REAUTH_THRESHOLD: Reauthentication threshold time, in + * terms of percentage of %NL80211_ATTR_PMK_LIFETIME + * (u8, dot11RSNAConfigPMKReauthThreshold, 1..100). This is an optional + * parameter configured through %NL80211_CMD_SET_PMKSA. Requests the + * driver to trigger a full authentication roam (without PMKSA caching) + * after the reauthentication threshold time, but before the PMK lifetime + * has expired. + * + * Authentication methods like SAE need to be able to generate a new PMKSA + * entry without having to force a disconnection after the PMK timeout. If + * no roaming occurs between the reauth threshold and PMK expiration, + * disassociation is still forced. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2912,6 +2940,11 @@ enum nl80211_attrs { NL80211_ATTR_TID_CONFIG, + NL80211_ATTR_CONTROL_PORT_NO_PREAUTH, + + NL80211_ATTR_PMK_LIFETIME, + NL80211_ATTR_PMK_REAUTH_THRESHOLD, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -5642,6 +5675,15 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_BEACON_PROTECTION: The driver supports Beacon protection * and can receive key configuration for BIGTK using key indexes 6 and 7. * + * @NL80211_EXT_FEATURE_CONTROL_PORT_NO_PREAUTH: The driver can disable the + * forwarding of preauth frames over the control port. They are then + * handled as ordinary data frames. + * + * @NL80211_EXT_FEATURE_PROTECTED_TWT: Driver supports protected TWT frames + * + * @NL80211_EXT_FEATURE_DEL_IBSS_STA: The driver supports removing stations + * in IBSS mode, essentially by dropping their state. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -5690,6 +5732,9 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_VLAN_OFFLOAD, NL80211_EXT_FEATURE_AQL, NL80211_EXT_FEATURE_BEACON_PROTECTION, + NL80211_EXT_FEATURE_CONTROL_PORT_NO_PREAUTH, + NL80211_EXT_FEATURE_PROTECTED_TWT, + NL80211_EXT_FEATURE_DEL_IBSS_STA, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, @@ -6312,12 +6357,14 @@ enum nl80211_ftm_responder_stats { * @NL80211_PREAMBLE_HT: HT preamble * @NL80211_PREAMBLE_VHT: VHT preamble * @NL80211_PREAMBLE_DMG: DMG preamble + * @NL80211_PREAMBLE_HE: HE preamble */ enum nl80211_preamble { NL80211_PREAMBLE_LEGACY, NL80211_PREAMBLE_HT, NL80211_PREAMBLE_VHT, NL80211_PREAMBLE_DMG, + NL80211_PREAMBLE_HE, }; /** @@ -6510,6 +6557,10 @@ enum nl80211_peer_measurement_attrs { * is valid) * @NL80211_PMSR_FTM_CAPA_ATTR_MAX_FTMS_PER_BURST: u32 attribute indicating * the maximum FTMs per burst (if not present anything is valid) + * @NL80211_PMSR_FTM_CAPA_ATTR_TRIGGER_BASED: flag attribute indicating if + * trigger based ranging measurement is supported + * @NL80211_PMSR_FTM_CAPA_ATTR_NON_TRIGGER_BASED: flag attribute indicating + * if non trigger based ranging measurement is supported * * @NUM_NL80211_PMSR_FTM_CAPA_ATTR: internal * @NL80211_PMSR_FTM_CAPA_ATTR_MAX: highest attribute number @@ -6525,6 +6576,8 @@ enum nl80211_peer_measurement_ftm_capa { NL80211_PMSR_FTM_CAPA_ATTR_BANDWIDTHS, NL80211_PMSR_FTM_CAPA_ATTR_MAX_BURSTS_EXPONENT, NL80211_PMSR_FTM_CAPA_ATTR_MAX_FTMS_PER_BURST, + NL80211_PMSR_FTM_CAPA_ATTR_TRIGGER_BASED, + NL80211_PMSR_FTM_CAPA_ATTR_NON_TRIGGER_BASED, /* keep last */ NUM_NL80211_PMSR_FTM_CAPA_ATTR, @@ -6554,6 +6607,20 @@ enum nl80211_peer_measurement_ftm_capa { * @NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI: request LCI data (flag) * @NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC: request civic location data * (flag) + * @NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED: request trigger based ranging + * measurement (flag). + * This attribute and %NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED are + * mutually exclusive. + * if neither %NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED nor + * %NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED is set, EDCA based + * ranging will be used. + * @NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED: request non trigger based + * ranging measurement (flag) + * This attribute and %NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED are + * mutually exclusive. + * if neither %NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED nor + * %NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED is set, EDCA based + * ranging will be used. * * @NUM_NL80211_PMSR_FTM_REQ_ATTR: internal * @NL80211_PMSR_FTM_REQ_ATTR_MAX: highest attribute number @@ -6570,6 +6637,8 @@ enum nl80211_peer_measurement_ftm_req { NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES, NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI, NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC, + NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED, + NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED, /* keep last */ NUM_NL80211_PMSR_FTM_REQ_ATTR, diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 81cc1a869588..6fcf7307e534 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -17,7 +17,7 @@ enum { TCA_ACT_PAD, TCA_ACT_COOKIE, TCA_ACT_FLAGS, - TCA_ACT_HW_STATS_TYPE, + TCA_ACT_HW_STATS, __TCA_ACT_MAX }; @@ -31,20 +31,19 @@ enum { * all supported bits set. * In case no bits are set, user is not interested in getting any HW statistics. */ -#define TCA_ACT_HW_STATS_TYPE_IMMEDIATE (1 << 0) /* Means that in dump, user - * gets the current HW stats - * state from the device - * queried at the dump time. - */ -#define TCA_ACT_HW_STATS_TYPE_DELAYED (1 << 1) /* Means that in dump, user gets - * HW stats that might be out - * of date for some time, maybe - * couple of seconds. This is - * the case when driver polls - * stats updates periodically - * or when it gets async stats update - * from the device. - */ +#define TCA_ACT_HW_STATS_IMMEDIATE (1 << 0) /* Means that in dump, user + * gets the current HW stats + * state from the device + * queried at the dump time. + */ +#define TCA_ACT_HW_STATS_DELAYED (1 << 1) /* Means that in dump, user gets + * HW stats that might be out of date + * for some time, maybe couple of + * seconds. This is the case when + * driver polls stats updates + * periodically or when it gets async + * stats update from the device. + */ #define TCA_ACT_MAX __TCA_ACT_MAX #define TCA_OLD_COMPAT (TCA_ACT_MAX+1) diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index bbe791b24168..7307a29a103e 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -256,6 +256,7 @@ enum { TCA_RED_PARMS, TCA_RED_STAB, TCA_RED_MAX_P, + TCA_RED_FLAGS, /* bitfield32 */ __TCA_RED_MAX, }; @@ -268,12 +269,28 @@ struct tc_red_qopt { unsigned char Wlog; /* log(W) */ unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ unsigned char Scell_log; /* cell size for idle damping */ + + /* This field can be used for flags that a RED-like qdisc has + * historically supported. E.g. when configuring RED, it can be used for + * ECN, HARDDROP and ADAPTATIVE. For SFQ it can be used for ECN, + * HARDDROP. Etc. Because this field has not been validated, and is + * copied back on dump, any bits besides those to which a given qdisc + * has assigned a historical meaning need to be considered for free use + * by userspace tools. + * + * Any further flags need to be passed differently, e.g. through an + * attribute (such as TCA_RED_FLAGS above). Such attribute should allow + * passing both recent and historic flags in one value. + */ unsigned char flags; #define TC_RED_ECN 1 #define TC_RED_HARDDROP 2 #define TC_RED_ADAPTATIVE 4 +#define TC_RED_NODROP 8 }; +#define TC_RED_HISTORIC_FLAGS (TC_RED_ECN | TC_RED_HARDDROP | TC_RED_ADAPTATIVE) + struct tc_red_xstats { __u32 early; /* Early drops */ __u32 pdrop; /* Drops due to queue limits */ @@ -894,6 +911,8 @@ enum { TCA_FQ_CE_THRESHOLD, /* DCTCP-like CE-marking threshold */ + TCA_FQ_TIMER_SLACK, /* timer slack */ + __TCA_FQ_MAX }; diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 585e07b27333..ecc27a17401a 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -46,5 +46,6 @@ #define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ #define VIRTIO_ID_FS 26 /* virtio filesystem */ #define VIRTIO_ID_PMEM 27 /* virtio pmem */ +#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ #endif /* _LINUX_VIRTIO_IDS_H */ diff --git a/include/xen/interface/io/tpmif.h b/include/xen/interface/io/tpmif.h index 28e7dcd75e82..f8aa8bac5196 100644 --- a/include/xen/interface/io/tpmif.h +++ b/include/xen/interface/io/tpmif.h @@ -46,7 +46,7 @@ struct vtpm_shared_page { uint8_t pad; uint8_t nr_extra_pages; /* extra pages for long packets; may be zero */ - uint32_t extra_pages[0]; /* grant IDs; length in nr_extra_pages */ + uint32_t extra_pages[]; /* grant IDs; length in nr_extra_pages */ }; #endif diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 89a889585ba0..850a43bd69d3 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -42,6 +42,7 @@ #include <linux/completion.h> #include <linux/init.h> #include <linux/slab.h> +#include <linux/semaphore.h> #include <xen/interface/xen.h> #include <xen/interface/grant_table.h> #include <xen/interface/io/xenbus.h> @@ -76,7 +77,7 @@ struct xenbus_device { enum xenbus_state state; struct completion down; struct work_struct work; - spinlock_t reclaim_lock; + struct semaphore reclaim_sem; }; static inline struct xenbus_device *to_xenbus_device(struct device *dev) diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index c498f0fffb40..ca5cc8cdb6eb 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -320,6 +320,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, struct bpf_struct_ops_value *uvalue, *kvalue; const struct btf_member *member; const struct btf_type *t = st_ops->type; + struct bpf_tramp_progs *tprogs = NULL; void *udata, *kdata; int prog_fd, err = 0; void *image; @@ -343,6 +344,10 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, if (uvalue->state || refcount_read(&uvalue->refcnt)) return -EINVAL; + tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL); + if (!tprogs) + return -ENOMEM; + uvalue = (struct bpf_struct_ops_value *)st_map->uvalue; kvalue = (struct bpf_struct_ops_value *)&st_map->kvalue; @@ -425,10 +430,12 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, goto reset_unlock; } + tprogs[BPF_TRAMP_FENTRY].progs[0] = prog; + tprogs[BPF_TRAMP_FENTRY].nr_progs = 1; err = arch_prepare_bpf_trampoline(image, st_map->image + PAGE_SIZE, &st_ops->func_models[i], 0, - &prog, 1, NULL, 0, NULL); + tprogs, NULL); if (err < 0) goto reset_unlock; @@ -469,6 +476,7 @@ reset_unlock: memset(uvalue, 0, map->value_size); memset(kvalue, 0, map->value_size); unlock: + kfree(tprogs); mutex_unlock(&st_map->lock); return err; } diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 787140095e58..50080add2ab9 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3710,13 +3710,26 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, nr_args--; } - if (prog->expected_attach_type == BPF_TRACE_FEXIT && - arg == nr_args) { - if (!t) - /* Default prog with 5 args. 6th arg is retval. */ - return true; - /* function return type */ - t = btf_type_by_id(btf, t->type); + if (arg == nr_args) { + if (prog->expected_attach_type == BPF_TRACE_FEXIT) { + if (!t) + return true; + t = btf_type_by_id(btf, t->type); + } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) { + /* For now the BPF_MODIFY_RETURN can only be attached to + * functions that return an int. + */ + if (!t) + return false; + + t = btf_type_skip_modifiers(btf, t->type, NULL); + if (!btf_type_is_int(t)) { + bpf_log(log, + "ret type %s not allowed for fmod_ret\n", + btf_kind_str[BTF_INFO_KIND(t->info)]); + return false; + } + } } else if (arg >= nr_args) { bpf_log(log, "func '%s' doesn't have %d-th argument\n", tname, arg + 1); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 973a20d49749..914f3463aa41 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -97,7 +97,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag fp->aux->prog = fp; fp->jit_requested = ebpf_jit_enabled(); - INIT_LIST_HEAD_RCU(&fp->aux->ksym_lnode); + INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode); return fp; } @@ -523,22 +523,22 @@ int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON); int bpf_jit_harden __read_mostly; long bpf_jit_limit __read_mostly; -static __always_inline void -bpf_get_prog_addr_region(const struct bpf_prog *prog, - unsigned long *symbol_start, - unsigned long *symbol_end) +static void +bpf_prog_ksym_set_addr(struct bpf_prog *prog) { const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog); unsigned long addr = (unsigned long)hdr; WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog)); - *symbol_start = addr; - *symbol_end = addr + hdr->pages * PAGE_SIZE; + prog->aux->ksym.start = (unsigned long) prog->bpf_func; + prog->aux->ksym.end = addr + hdr->pages * PAGE_SIZE; } -void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) +static void +bpf_prog_ksym_set_name(struct bpf_prog *prog) { + char *sym = prog->aux->ksym.name; const char *end = sym + KSYM_NAME_LEN; const struct btf_type *type; const char *func_name; @@ -572,36 +572,27 @@ void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) *sym = 0; } -static __always_inline unsigned long -bpf_get_prog_addr_start(struct latch_tree_node *n) +static unsigned long bpf_get_ksym_start(struct latch_tree_node *n) { - unsigned long symbol_start, symbol_end; - const struct bpf_prog_aux *aux; - - aux = container_of(n, struct bpf_prog_aux, ksym_tnode); - bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end); - - return symbol_start; + return container_of(n, struct bpf_ksym, tnode)->start; } static __always_inline bool bpf_tree_less(struct latch_tree_node *a, struct latch_tree_node *b) { - return bpf_get_prog_addr_start(a) < bpf_get_prog_addr_start(b); + return bpf_get_ksym_start(a) < bpf_get_ksym_start(b); } static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n) { unsigned long val = (unsigned long)key; - unsigned long symbol_start, symbol_end; - const struct bpf_prog_aux *aux; + const struct bpf_ksym *ksym; - aux = container_of(n, struct bpf_prog_aux, ksym_tnode); - bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end); + ksym = container_of(n, struct bpf_ksym, tnode); - if (val < symbol_start) + if (val < ksym->start) return -1; - if (val >= symbol_end) + if (val >= ksym->end) return 1; return 0; @@ -616,20 +607,29 @@ static DEFINE_SPINLOCK(bpf_lock); static LIST_HEAD(bpf_kallsyms); static struct latch_tree_root bpf_tree __cacheline_aligned; -static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux) +void bpf_ksym_add(struct bpf_ksym *ksym) { - WARN_ON_ONCE(!list_empty(&aux->ksym_lnode)); - list_add_tail_rcu(&aux->ksym_lnode, &bpf_kallsyms); - latch_tree_insert(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops); + spin_lock_bh(&bpf_lock); + WARN_ON_ONCE(!list_empty(&ksym->lnode)); + list_add_tail_rcu(&ksym->lnode, &bpf_kallsyms); + latch_tree_insert(&ksym->tnode, &bpf_tree, &bpf_tree_ops); + spin_unlock_bh(&bpf_lock); } -static void bpf_prog_ksym_node_del(struct bpf_prog_aux *aux) +static void __bpf_ksym_del(struct bpf_ksym *ksym) { - if (list_empty(&aux->ksym_lnode)) + if (list_empty(&ksym->lnode)) return; - latch_tree_erase(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops); - list_del_rcu(&aux->ksym_lnode); + latch_tree_erase(&ksym->tnode, &bpf_tree, &bpf_tree_ops); + list_del_rcu(&ksym->lnode); +} + +void bpf_ksym_del(struct bpf_ksym *ksym) +{ + spin_lock_bh(&bpf_lock); + __bpf_ksym_del(ksym); + spin_unlock_bh(&bpf_lock); } static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp) @@ -639,8 +639,8 @@ static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp) static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) { - return list_empty(&fp->aux->ksym_lnode) || - fp->aux->ksym_lnode.prev == LIST_POISON2; + return list_empty(&fp->aux->ksym.lnode) || + fp->aux->ksym.lnode.prev == LIST_POISON2; } void bpf_prog_kallsyms_add(struct bpf_prog *fp) @@ -649,9 +649,11 @@ void bpf_prog_kallsyms_add(struct bpf_prog *fp) !capable(CAP_SYS_ADMIN)) return; - spin_lock_bh(&bpf_lock); - bpf_prog_ksym_node_add(fp->aux); - spin_unlock_bh(&bpf_lock); + bpf_prog_ksym_set_addr(fp); + bpf_prog_ksym_set_name(fp); + fp->aux->ksym.prog = true; + + bpf_ksym_add(&fp->aux->ksym); } void bpf_prog_kallsyms_del(struct bpf_prog *fp) @@ -659,33 +661,30 @@ void bpf_prog_kallsyms_del(struct bpf_prog *fp) if (!bpf_prog_kallsyms_candidate(fp)) return; - spin_lock_bh(&bpf_lock); - bpf_prog_ksym_node_del(fp->aux); - spin_unlock_bh(&bpf_lock); + bpf_ksym_del(&fp->aux->ksym); } -static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr) +static struct bpf_ksym *bpf_ksym_find(unsigned long addr) { struct latch_tree_node *n; n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops); - return n ? - container_of(n, struct bpf_prog_aux, ksym_tnode)->prog : - NULL; + return n ? container_of(n, struct bpf_ksym, tnode) : NULL; } const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char *sym) { - unsigned long symbol_start, symbol_end; - struct bpf_prog *prog; + struct bpf_ksym *ksym; char *ret = NULL; rcu_read_lock(); - prog = bpf_prog_kallsyms_find(addr); - if (prog) { - bpf_get_prog_addr_region(prog, &symbol_start, &symbol_end); - bpf_get_prog_name(prog, sym); + ksym = bpf_ksym_find(addr); + if (ksym) { + unsigned long symbol_start = ksym->start; + unsigned long symbol_end = ksym->end; + + strncpy(sym, ksym->name, KSYM_NAME_LEN); ret = sym; if (size) @@ -703,19 +702,28 @@ bool is_bpf_text_address(unsigned long addr) bool ret; rcu_read_lock(); - ret = bpf_prog_kallsyms_find(addr) != NULL; + ret = bpf_ksym_find(addr) != NULL; rcu_read_unlock(); return ret; } +static struct bpf_prog *bpf_prog_ksym_find(unsigned long addr) +{ + struct bpf_ksym *ksym = bpf_ksym_find(addr); + + return ksym && ksym->prog ? + container_of(ksym, struct bpf_prog_aux, ksym)->prog : + NULL; +} + const struct exception_table_entry *search_bpf_extables(unsigned long addr) { const struct exception_table_entry *e = NULL; struct bpf_prog *prog; rcu_read_lock(); - prog = bpf_prog_kallsyms_find(addr); + prog = bpf_prog_ksym_find(addr); if (!prog) goto out; if (!prog->aux->num_exentries) @@ -730,7 +738,7 @@ out: int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *sym) { - struct bpf_prog_aux *aux; + struct bpf_ksym *ksym; unsigned int it = 0; int ret = -ERANGE; @@ -738,13 +746,13 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, return ret; rcu_read_lock(); - list_for_each_entry_rcu(aux, &bpf_kallsyms, ksym_lnode) { + list_for_each_entry_rcu(ksym, &bpf_kallsyms, lnode) { if (it++ != symnum) continue; - bpf_get_prog_name(aux->prog, sym); + strncpy(sym, ksym->name, KSYM_NAME_LEN); - *value = (unsigned long)aux->prog->bpf_func; + *value = ksym->start; *type = BPF_SYM_ELF_TYPE; ret = 0; @@ -2149,6 +2157,7 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak; const struct bpf_func_proto bpf_get_current_comm_proto __weak; const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak; const struct bpf_func_proto bpf_get_local_storage_proto __weak; +const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak; const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) { diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c index b3e5b214fed8..2444bd15cc2d 100644 --- a/kernel/bpf/dispatcher.c +++ b/kernel/bpf/dispatcher.c @@ -113,7 +113,7 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) noff = 0; } else { old = d->image + d->image_off; - noff = d->image_off ^ (BPF_IMAGE_SIZE / 2); + noff = d->image_off ^ (PAGE_SIZE / 2); } new = d->num_progs ? d->image + noff : NULL; @@ -140,9 +140,10 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, mutex_lock(&d->mutex); if (!d->image) { - d->image = bpf_image_alloc(); + d->image = bpf_jit_alloc_exec_page(); if (!d->image) goto out; + bpf_image_ksym_add(d->image, &d->ksym); } prev_num_progs = d->num_progs; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index d8b7b110a1c5..01878db15eaf 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -12,6 +12,8 @@ #include <linux/filter.h> #include <linux/ctype.h> #include <linux/jiffies.h> +#include <linux/pid_namespace.h> +#include <linux/proc_ns.h> #include "../../lib/kstrtox.h" @@ -499,3 +501,46 @@ const struct bpf_func_proto bpf_strtoul_proto = { .arg4_type = ARG_PTR_TO_LONG, }; #endif + +BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino, + struct bpf_pidns_info *, nsdata, u32, size) +{ + struct task_struct *task = current; + struct pid_namespace *pidns; + int err = -EINVAL; + + if (unlikely(size != sizeof(struct bpf_pidns_info))) + goto clear; + + if (unlikely((u64)(dev_t)dev != dev)) + goto clear; + + if (unlikely(!task)) + goto clear; + + pidns = task_active_pid_ns(task); + if (unlikely(!pidns)) { + err = -ENOENT; + goto clear; + } + + if (!ns_match(&pidns->ns, (dev_t)dev, ino)) + goto clear; + + nsdata->pid = task_pid_nr_ns(task, pidns); + nsdata->tgid = task_tgid_nr_ns(task, pidns); + return 0; +clear: + memset((void *)nsdata, 0, (size_t) size); + return err; +} + +const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = { + .func = bpf_get_ns_current_pid_tgid, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_UNINIT_MEM, + .arg4_type = ARG_CONST_SIZE, +}; diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 5e40e7fccc21..95087d9f4ed3 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -25,6 +25,7 @@ enum bpf_type { BPF_TYPE_UNSPEC = 0, BPF_TYPE_PROG, BPF_TYPE_MAP, + BPF_TYPE_LINK, }; static void *bpf_any_get(void *raw, enum bpf_type type) @@ -36,6 +37,9 @@ static void *bpf_any_get(void *raw, enum bpf_type type) case BPF_TYPE_MAP: bpf_map_inc_with_uref(raw); break; + case BPF_TYPE_LINK: + bpf_link_inc(raw); + break; default: WARN_ON_ONCE(1); break; @@ -53,6 +57,9 @@ static void bpf_any_put(void *raw, enum bpf_type type) case BPF_TYPE_MAP: bpf_map_put_with_uref(raw); break; + case BPF_TYPE_LINK: + bpf_link_put(raw); + break; default: WARN_ON_ONCE(1); break; @@ -63,20 +70,32 @@ static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type) { void *raw; - *type = BPF_TYPE_MAP; raw = bpf_map_get_with_uref(ufd); - if (IS_ERR(raw)) { + if (!IS_ERR(raw)) { + *type = BPF_TYPE_MAP; + return raw; + } + + raw = bpf_prog_get(ufd); + if (!IS_ERR(raw)) { *type = BPF_TYPE_PROG; - raw = bpf_prog_get(ufd); + return raw; } - return raw; + raw = bpf_link_get_from_fd(ufd); + if (!IS_ERR(raw)) { + *type = BPF_TYPE_LINK; + return raw; + } + + return ERR_PTR(-EINVAL); } static const struct inode_operations bpf_dir_iops; static const struct inode_operations bpf_prog_iops = { }; static const struct inode_operations bpf_map_iops = { }; +static const struct inode_operations bpf_link_iops = { }; static struct inode *bpf_get_inode(struct super_block *sb, const struct inode *dir, @@ -114,6 +133,8 @@ static int bpf_inode_type(const struct inode *inode, enum bpf_type *type) *type = BPF_TYPE_PROG; else if (inode->i_op == &bpf_map_iops) *type = BPF_TYPE_MAP; + else if (inode->i_op == &bpf_link_iops) + *type = BPF_TYPE_LINK; else return -EACCES; @@ -335,6 +356,12 @@ static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg) &bpffs_map_fops : &bpffs_obj_fops); } +static int bpf_mklink(struct dentry *dentry, umode_t mode, void *arg) +{ + return bpf_mkobj_ops(dentry, mode, arg, &bpf_link_iops, + &bpffs_obj_fops); +} + static struct dentry * bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) { @@ -411,6 +438,9 @@ static int bpf_obj_do_pin(const char __user *pathname, void *raw, case BPF_TYPE_MAP: ret = vfs_mkobj(dentry, mode, bpf_mkmap, raw); break; + case BPF_TYPE_LINK: + ret = vfs_mkobj(dentry, mode, bpf_mklink, raw); + break; default: ret = -EPERM; } @@ -487,6 +517,8 @@ int bpf_obj_get_user(const char __user *pathname, int flags) ret = bpf_prog_new_fd(raw); else if (type == BPF_TYPE_MAP) ret = bpf_map_new_fd(raw, f_flags); + else if (type == BPF_TYPE_LINK) + ret = bpf_link_new_fd(raw); else return -ENOENT; @@ -504,6 +536,8 @@ static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type if (inode->i_op == &bpf_map_iops) return ERR_PTR(-EINVAL); + if (inode->i_op == &bpf_link_iops) + return ERR_PTR(-EINVAL); if (inode->i_op != &bpf_prog_iops) return ERR_PTR(-EACCES); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index c536c65256ad..85567a6ea5f9 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2173,84 +2173,274 @@ static int bpf_obj_get(const union bpf_attr *attr) attr->file_flags); } -static int bpf_tracing_prog_release(struct inode *inode, struct file *filp) +struct bpf_link { + atomic64_t refcnt; + const struct bpf_link_ops *ops; + struct bpf_prog *prog; + struct work_struct work; +}; + +void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, + struct bpf_prog *prog) { - struct bpf_prog *prog = filp->private_data; + atomic64_set(&link->refcnt, 1); + link->ops = ops; + link->prog = prog; +} - WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog)); - bpf_prog_put(prog); +/* Clean up bpf_link and corresponding anon_inode file and FD. After + * anon_inode is created, bpf_link can't be just kfree()'d due to deferred + * anon_inode's release() call. This helper manages marking bpf_link as + * defunct, releases anon_inode file and puts reserved FD. + */ +static void bpf_link_cleanup(struct bpf_link *link, struct file *link_file, + int link_fd) +{ + link->prog = NULL; + fput(link_file); + put_unused_fd(link_fd); +} + +void bpf_link_inc(struct bpf_link *link) +{ + atomic64_inc(&link->refcnt); +} + +/* bpf_link_free is guaranteed to be called from process context */ +static void bpf_link_free(struct bpf_link *link) +{ + if (link->prog) { + /* detach BPF program, clean up used resources */ + link->ops->release(link); + bpf_prog_put(link->prog); + } + /* free bpf_link and its containing memory */ + link->ops->dealloc(link); +} + +static void bpf_link_put_deferred(struct work_struct *work) +{ + struct bpf_link *link = container_of(work, struct bpf_link, work); + + bpf_link_free(link); +} + +/* bpf_link_put can be called from atomic context, but ensures that resources + * are freed from process context + */ +void bpf_link_put(struct bpf_link *link) +{ + if (!atomic64_dec_and_test(&link->refcnt)) + return; + + if (in_atomic()) { + INIT_WORK(&link->work, bpf_link_put_deferred); + schedule_work(&link->work); + } else { + bpf_link_free(link); + } +} + +static int bpf_link_release(struct inode *inode, struct file *filp) +{ + struct bpf_link *link = filp->private_data; + + bpf_link_put(link); return 0; } -static const struct file_operations bpf_tracing_prog_fops = { - .release = bpf_tracing_prog_release, +#ifdef CONFIG_PROC_FS +static const struct bpf_link_ops bpf_raw_tp_lops; +static const struct bpf_link_ops bpf_tracing_link_lops; +static const struct bpf_link_ops bpf_xdp_link_lops; + +static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) +{ + const struct bpf_link *link = filp->private_data; + const struct bpf_prog *prog = link->prog; + char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; + const char *link_type; + + if (link->ops == &bpf_raw_tp_lops) + link_type = "raw_tracepoint"; + else if (link->ops == &bpf_tracing_link_lops) + link_type = "tracing"; + else + link_type = "unknown"; + + bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); + seq_printf(m, + "link_type:\t%s\n" + "prog_tag:\t%s\n" + "prog_id:\t%u\n", + link_type, + prog_tag, + prog->aux->id); +} +#endif + +const struct file_operations bpf_link_fops = { +#ifdef CONFIG_PROC_FS + .show_fdinfo = bpf_link_show_fdinfo, +#endif + .release = bpf_link_release, .read = bpf_dummy_read, .write = bpf_dummy_write, }; +int bpf_link_new_fd(struct bpf_link *link) +{ + return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC); +} + +/* Similar to bpf_link_new_fd, create anon_inode for given bpf_link, but + * instead of immediately installing fd in fdtable, just reserve it and + * return. Caller then need to either install it with fd_install(fd, file) or + * release with put_unused_fd(fd). + * This is useful for cases when bpf_link attachment/detachment are + * complicated and expensive operations and should be delayed until all the fd + * reservation and anon_inode creation succeeds. + */ +struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd) +{ + struct file *file; + int fd; + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) + return ERR_PTR(fd); + + file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC); + if (IS_ERR(file)) { + put_unused_fd(fd); + return file; + } + + *reserved_fd = fd; + return file; +} + +struct bpf_link *bpf_link_get_from_fd(u32 ufd) +{ + struct fd f = fdget(ufd); + struct bpf_link *link; + + if (!f.file) + return ERR_PTR(-EBADF); + if (f.file->f_op != &bpf_link_fops) { + fdput(f); + return ERR_PTR(-EINVAL); + } + + link = f.file->private_data; + bpf_link_inc(link); + fdput(f); + + return link; +} + +struct bpf_tracing_link { + struct bpf_link link; +}; + +static void bpf_tracing_link_release(struct bpf_link *link) +{ + WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog)); +} + +static void bpf_tracing_link_dealloc(struct bpf_link *link) +{ + struct bpf_tracing_link *tr_link = + container_of(link, struct bpf_tracing_link, link); + + kfree(tr_link); +} + +static const struct bpf_link_ops bpf_tracing_link_lops = { + .release = bpf_tracing_link_release, + .dealloc = bpf_tracing_link_dealloc, +}; + static int bpf_tracing_prog_attach(struct bpf_prog *prog) { - int tr_fd, err; + struct bpf_tracing_link *link; + struct file *link_file; + int link_fd, err; if (prog->expected_attach_type != BPF_TRACE_FENTRY && prog->expected_attach_type != BPF_TRACE_FEXIT && + prog->expected_attach_type != BPF_MODIFY_RETURN && prog->type != BPF_PROG_TYPE_EXT) { err = -EINVAL; goto out_put_prog; } - err = bpf_trampoline_link_prog(prog); - if (err) + link = kzalloc(sizeof(*link), GFP_USER); + if (!link) { + err = -ENOMEM; + goto out_put_prog; + } + bpf_link_init(&link->link, &bpf_tracing_link_lops, prog); + + link_file = bpf_link_new_file(&link->link, &link_fd); + if (IS_ERR(link_file)) { + kfree(link); + err = PTR_ERR(link_file); goto out_put_prog; + } - tr_fd = anon_inode_getfd("bpf-tracing-prog", &bpf_tracing_prog_fops, - prog, O_CLOEXEC); - if (tr_fd < 0) { - WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog)); - err = tr_fd; + err = bpf_trampoline_link_prog(prog); + if (err) { + bpf_link_cleanup(&link->link, link_file, link_fd); goto out_put_prog; } - return tr_fd; + + fd_install(link_fd, link_file); + return link_fd; out_put_prog: bpf_prog_put(prog); return err; } -struct bpf_raw_tracepoint { +struct bpf_raw_tp_link { + struct bpf_link link; struct bpf_raw_event_map *btp; - struct bpf_prog *prog; }; -static int bpf_raw_tracepoint_release(struct inode *inode, struct file *filp) +static void bpf_raw_tp_link_release(struct bpf_link *link) { - struct bpf_raw_tracepoint *raw_tp = filp->private_data; + struct bpf_raw_tp_link *raw_tp = + container_of(link, struct bpf_raw_tp_link, link); - if (raw_tp->prog) { - bpf_probe_unregister(raw_tp->btp, raw_tp->prog); - bpf_prog_put(raw_tp->prog); - } + bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog); bpf_put_raw_tracepoint(raw_tp->btp); +} + +static void bpf_raw_tp_link_dealloc(struct bpf_link *link) +{ + struct bpf_raw_tp_link *raw_tp = + container_of(link, struct bpf_raw_tp_link, link); + kfree(raw_tp); - return 0; } -static const struct file_operations bpf_raw_tp_fops = { - .release = bpf_raw_tracepoint_release, - .read = bpf_dummy_read, - .write = bpf_dummy_write, +static const struct bpf_link_ops bpf_raw_tp_lops = { + .release = bpf_raw_tp_link_release, + .dealloc = bpf_raw_tp_link_dealloc, }; #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd static int bpf_raw_tracepoint_open(const union bpf_attr *attr) { - struct bpf_raw_tracepoint *raw_tp; + struct bpf_raw_tp_link *link; struct bpf_raw_event_map *btp; + struct file *link_file; struct bpf_prog *prog; const char *tp_name; char buf[128]; - int tp_fd, err; + int link_fd, err; if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN)) return -EINVAL; @@ -2297,29 +2487,30 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) goto out_put_prog; } - raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER); - if (!raw_tp) { + link = kzalloc(sizeof(*link), GFP_USER); + if (!link) { err = -ENOMEM; goto out_put_btp; } - raw_tp->btp = btp; - raw_tp->prog = prog; + bpf_link_init(&link->link, &bpf_raw_tp_lops, prog); + link->btp = btp; - err = bpf_probe_register(raw_tp->btp, prog); - if (err) - goto out_free_tp; + link_file = bpf_link_new_file(&link->link, &link_fd); + if (IS_ERR(link_file)) { + kfree(link); + err = PTR_ERR(link_file); + goto out_put_btp; + } - tp_fd = anon_inode_getfd("bpf-raw-tracepoint", &bpf_raw_tp_fops, raw_tp, - O_CLOEXEC); - if (tp_fd < 0) { - bpf_probe_unregister(raw_tp->btp, prog); - err = tp_fd; - goto out_free_tp; + err = bpf_probe_register(link->btp, prog); + if (err) { + bpf_link_cleanup(&link->link, link_file, link_fd); + goto out_put_btp; } - return tp_fd; -out_free_tp: - kfree(raw_tp); + fd_install(link_fd, link_file); + return link_fd; + out_put_btp: bpf_put_raw_tracepoint(btp); out_put_prog: @@ -3266,15 +3457,21 @@ static int bpf_task_fd_query(const union bpf_attr *attr, if (err) goto out; - if (file->f_op == &bpf_raw_tp_fops) { - struct bpf_raw_tracepoint *raw_tp = file->private_data; - struct bpf_raw_event_map *btp = raw_tp->btp; + if (file->f_op == &bpf_link_fops) { + struct bpf_link *link = file->private_data; - err = bpf_task_fd_query_copy(attr, uattr, - raw_tp->prog->aux->id, - BPF_FD_TYPE_RAW_TRACEPOINT, - btp->tp->name, 0, 0); - goto put_file; + if (link->ops == &bpf_raw_tp_lops) { + struct bpf_raw_tp_link *raw_tp = + container_of(link, struct bpf_raw_tp_link, link); + struct bpf_raw_event_map *btp = raw_tp->btp; + + err = bpf_task_fd_query_copy(attr, uattr, + raw_tp->link.prog->aux->id, + BPF_FD_TYPE_RAW_TRACEPOINT, + btp->tp->name, 0, 0); + goto put_file; + } + goto out_not_supp; } event = perf_get_event(file); @@ -3294,6 +3491,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr, goto put_file; } +out_not_supp: err = -ENOTSUPP; put_file: fput(file); diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 704fa787fec0..f30bca2a4d01 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -5,6 +5,7 @@ #include <linux/filter.h> #include <linux/ftrace.h> #include <linux/rbtree_latch.h> +#include <linux/perf_event.h> /* dummy _ops. The verifier will operate on target program's ops. */ const struct bpf_verifier_ops bpf_extension_verifier_ops = { @@ -17,12 +18,11 @@ const struct bpf_prog_ops bpf_extension_prog_ops = { #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS) static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE]; -static struct latch_tree_root image_tree __cacheline_aligned; -/* serializes access to trampoline_table and image_tree */ +/* serializes access to trampoline_table */ static DEFINE_MUTEX(trampoline_mutex); -static void *bpf_jit_alloc_exec_page(void) +void *bpf_jit_alloc_exec_page(void) { void *image; @@ -38,62 +38,28 @@ static void *bpf_jit_alloc_exec_page(void) return image; } -static __always_inline bool image_tree_less(struct latch_tree_node *a, - struct latch_tree_node *b) +void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym) { - struct bpf_image *ia = container_of(a, struct bpf_image, tnode); - struct bpf_image *ib = container_of(b, struct bpf_image, tnode); - - return ia < ib; -} - -static __always_inline int image_tree_comp(void *addr, struct latch_tree_node *n) -{ - void *image = container_of(n, struct bpf_image, tnode); - - if (addr < image) - return -1; - if (addr >= image + PAGE_SIZE) - return 1; - - return 0; -} - -static const struct latch_tree_ops image_tree_ops = { - .less = image_tree_less, - .comp = image_tree_comp, -}; - -static void *__bpf_image_alloc(bool lock) -{ - struct bpf_image *image; - - image = bpf_jit_alloc_exec_page(); - if (!image) - return NULL; - - if (lock) - mutex_lock(&trampoline_mutex); - latch_tree_insert(&image->tnode, &image_tree, &image_tree_ops); - if (lock) - mutex_unlock(&trampoline_mutex); - return image->data; + ksym->start = (unsigned long) data; + ksym->end = ksym->start + PAGE_SIZE; + bpf_ksym_add(ksym); + perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start, + PAGE_SIZE, false, ksym->name); } -void *bpf_image_alloc(void) +void bpf_image_ksym_del(struct bpf_ksym *ksym) { - return __bpf_image_alloc(true); + bpf_ksym_del(ksym); + perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start, + PAGE_SIZE, true, ksym->name); } -bool is_bpf_image_address(unsigned long addr) +static void bpf_trampoline_ksym_add(struct bpf_trampoline *tr) { - bool ret; - - rcu_read_lock(); - ret = latch_tree_find((void *) addr, &image_tree, &image_tree_ops) != NULL; - rcu_read_unlock(); + struct bpf_ksym *ksym = &tr->ksym; - return ret; + snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", tr->key); + bpf_image_ksym_add(tr->image, ksym); } struct bpf_trampoline *bpf_trampoline_lookup(u64 key) @@ -116,7 +82,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key) goto out; /* is_root was checked earlier. No need for bpf_jit_charge_modmem() */ - image = __bpf_image_alloc(false); + image = bpf_jit_alloc_exec_page(); if (!image) { kfree(tr); tr = NULL; @@ -131,6 +97,8 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key) for (i = 0; i < BPF_TRAMP_MAX; i++) INIT_HLIST_HEAD(&tr->progs_hlist[i]); tr->image = image; + INIT_LIST_HEAD_RCU(&tr->ksym.lnode); + bpf_trampoline_ksym_add(tr); out: mutex_unlock(&trampoline_mutex); return tr; @@ -190,40 +158,50 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) return ret; } -/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 - * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 - */ -#define BPF_MAX_TRAMP_PROGS 40 +static struct bpf_tramp_progs * +bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total) +{ + const struct bpf_prog_aux *aux; + struct bpf_tramp_progs *tprogs; + struct bpf_prog **progs; + int kind; + + *total = 0; + tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL); + if (!tprogs) + return ERR_PTR(-ENOMEM); + + for (kind = 0; kind < BPF_TRAMP_MAX; kind++) { + tprogs[kind].nr_progs = tr->progs_cnt[kind]; + *total += tr->progs_cnt[kind]; + progs = tprogs[kind].progs; + + hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist) + *progs++ = aux->prog; + } + return tprogs; +} static int bpf_trampoline_update(struct bpf_trampoline *tr) { - void *old_image = tr->image + ((tr->selector + 1) & 1) * BPF_IMAGE_SIZE/2; - void *new_image = tr->image + (tr->selector & 1) * BPF_IMAGE_SIZE/2; - struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS]; - int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY]; - int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT]; - struct bpf_prog **progs, **fentry, **fexit; + void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2; + void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2; + struct bpf_tramp_progs *tprogs; u32 flags = BPF_TRAMP_F_RESTORE_REGS; - struct bpf_prog_aux *aux; - int err; + int err, total; + + tprogs = bpf_trampoline_get_progs(tr, &total); + if (IS_ERR(tprogs)) + return PTR_ERR(tprogs); - if (fentry_cnt + fexit_cnt == 0) { + if (total == 0) { err = unregister_fentry(tr, old_image); tr->selector = 0; goto out; } - /* populate fentry progs */ - fentry = progs = progs_to_run; - hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FENTRY], tramp_hlist) - *progs++ = aux->prog; - - /* populate fexit progs */ - fexit = progs; - hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FEXIT], tramp_hlist) - *progs++ = aux->prog; - - if (fexit_cnt) + if (tprogs[BPF_TRAMP_FEXIT].nr_progs || + tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs) flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; /* Though the second half of trampoline page is unused a task could be @@ -232,12 +210,11 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) * preempted task. Hence wait for tasks to voluntarily schedule or go * to userspace. */ + synchronize_rcu_tasks(); - err = arch_prepare_bpf_trampoline(new_image, new_image + BPF_IMAGE_SIZE / 2, - &tr->func.model, flags, - fentry, fentry_cnt, - fexit, fexit_cnt, + err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2, + &tr->func.model, flags, tprogs, tr->func.addr); if (err < 0) goto out; @@ -252,6 +229,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) goto out; tr->selector++; out: + kfree(tprogs); return err; } @@ -260,6 +238,8 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t) switch (t) { case BPF_TRACE_FENTRY: return BPF_TRAMP_FENTRY; + case BPF_MODIFY_RETURN: + return BPF_TRAMP_MODIFY_RETURN; case BPF_TRACE_FEXIT: return BPF_TRAMP_FEXIT; default: @@ -344,8 +324,6 @@ out: void bpf_trampoline_put(struct bpf_trampoline *tr) { - struct bpf_image *image; - if (!tr) return; mutex_lock(&trampoline_mutex); @@ -356,11 +334,10 @@ void bpf_trampoline_put(struct bpf_trampoline *tr) goto out; if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT]))) goto out; - image = container_of(tr->image, struct bpf_image, data); - latch_tree_erase(&image->tnode, &image_tree, &image_tree_ops); + bpf_image_ksym_del(&tr->ksym); /* wait for tasks to get out of trampoline before freeing it */ synchronize_rcu_tasks(); - bpf_jit_free_exec(image); + bpf_jit_free_exec(tr->image); hlist_del(&tr->hlist); kfree(tr); out: @@ -375,6 +352,7 @@ out: * call __bpf_prog_exit */ u64 notrace __bpf_prog_enter(void) + __acquires(RCU) { u64 start = 0; @@ -386,6 +364,7 @@ u64 notrace __bpf_prog_enter(void) } void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start) + __releases(RCU) { struct bpf_prog_stats *stats; @@ -409,8 +388,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start) int __weak arch_prepare_bpf_trampoline(void *image, void *image_end, const struct btf_func_model *m, u32 flags, - struct bpf_prog **fentry_progs, int fentry_cnt, - struct bpf_prog **fexit_progs, int fexit_cnt, + struct bpf_tramp_progs *tprogs, void *orig_call) { return -ENOTSUPP; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 289383edfc8c..745f3cfdf3b2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -19,6 +19,7 @@ #include <linux/sort.h> #include <linux/perf_event.h> #include <linux/ctype.h> +#include <linux/error-injection.h> #include "disasm.h" @@ -3649,7 +3650,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, if (func_id != BPF_FUNC_perf_event_read && func_id != BPF_FUNC_perf_event_output && func_id != BPF_FUNC_skb_output && - func_id != BPF_FUNC_perf_event_read_value) + func_id != BPF_FUNC_perf_event_read_value && + func_id != BPF_FUNC_xdp_output) goto error; break; case BPF_MAP_TYPE_STACK_TRACE: @@ -3739,6 +3741,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_FUNC_perf_event_output: case BPF_FUNC_perf_event_read_value: case BPF_FUNC_skb_output: + case BPF_FUNC_xdp_output: if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) goto error; break; @@ -9800,6 +9803,26 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env) return 0; } +#define SECURITY_PREFIX "security_" + +static int check_attach_modify_return(struct bpf_verifier_env *env) +{ + struct bpf_prog *prog = env->prog; + unsigned long addr = (unsigned long) prog->aux->trampoline->func.addr; + + /* This is expected to be cleaned up in the future with the KRSI effort + * introducing the LSM_HOOK macro for cleaning up lsm_hooks.h. + */ + if (within_error_injection_list(addr) || + !strncmp(SECURITY_PREFIX, prog->aux->attach_func_name, + sizeof(SECURITY_PREFIX) - 1)) + return 0; + + verbose(env, "fmod_ret attach_btf_id %u (%s) is not modifiable\n", + prog->aux->attach_btf_id, prog->aux->attach_func_name); + + return -EINVAL; +} static int check_attach_btf_id(struct bpf_verifier_env *env) { @@ -9950,6 +9973,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) if (!prog_extension) return -EINVAL; /* fallthrough */ + case BPF_MODIFY_RETURN: case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: if (!btf_type_is_func(t)) { @@ -9999,6 +10023,9 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) } tr->func.addr = (void *)addr; prog->aux->trampoline = tr; + + if (prog->expected_attach_type == BPF_MODIFY_RETURN) + ret = check_attach_modify_return(env); out: mutex_unlock(&tr->mutex); if (ret) diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index be1a1c83cdd1..f2d7cea86ffe 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -471,6 +471,7 @@ static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) */ p++; if (p >= end) { + (*pos)++; return NULL; } else { *pos = *p; @@ -782,7 +783,7 @@ void cgroup1_release_agent(struct work_struct *work) pathbuf = kmalloc(PATH_MAX, GFP_KERNEL); agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL); - if (!pathbuf || !agentbuf) + if (!pathbuf || !agentbuf || !strlen(agentbuf)) goto out; spin_lock_irq(&css_set_lock); diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 75f687301bbf..3dead0416b91 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3542,21 +3542,21 @@ static int cpu_stat_show(struct seq_file *seq, void *v) static int cgroup_io_pressure_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; - struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi; + struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi; return psi_show(seq, psi, PSI_IO); } static int cgroup_memory_pressure_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; - struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi; + struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi; return psi_show(seq, psi, PSI_MEM); } static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; - struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi; + struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi; return psi_show(seq, psi, PSI_CPU); } @@ -4400,12 +4400,16 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it) } } while (!css_set_populated(cset) && list_empty(&cset->dying_tasks)); - if (!list_empty(&cset->tasks)) + if (!list_empty(&cset->tasks)) { it->task_pos = cset->tasks.next; - else if (!list_empty(&cset->mg_tasks)) + it->cur_tasks_head = &cset->tasks; + } else if (!list_empty(&cset->mg_tasks)) { it->task_pos = cset->mg_tasks.next; - else + it->cur_tasks_head = &cset->mg_tasks; + } else { it->task_pos = cset->dying_tasks.next; + it->cur_tasks_head = &cset->dying_tasks; + } it->tasks_head = &cset->tasks; it->mg_tasks_head = &cset->mg_tasks; @@ -4463,10 +4467,14 @@ repeat: else it->task_pos = it->task_pos->next; - if (it->task_pos == it->tasks_head) + if (it->task_pos == it->tasks_head) { it->task_pos = it->mg_tasks_head->next; - if (it->task_pos == it->mg_tasks_head) + it->cur_tasks_head = it->mg_tasks_head; + } + if (it->task_pos == it->mg_tasks_head) { it->task_pos = it->dying_tasks_head->next; + it->cur_tasks_head = it->dying_tasks_head; + } if (it->task_pos == it->dying_tasks_head) css_task_iter_advance_css_set(it); } else { @@ -4485,11 +4493,12 @@ repeat: goto repeat; /* and dying leaders w/o live member threads */ - if (!atomic_read(&task->signal->live)) + if (it->cur_tasks_head == it->dying_tasks_head && + !atomic_read(&task->signal->live)) goto repeat; } else { /* skip all dying ones */ - if (task->flags & PF_EXITING) + if (it->cur_tasks_head == it->dying_tasks_head) goto repeat; } } @@ -4595,6 +4604,9 @@ static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos) struct kernfs_open_file *of = s->private; struct css_task_iter *it = of->priv; + if (pos) + (*pos)++; + return css_task_iter_next(it); } @@ -4610,7 +4622,7 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, * from position 0, so we can simply keep iterating on !0 *pos. */ if (!it) { - if (WARN_ON_ONCE((*pos)++)) + if (WARN_ON_ONCE((*pos))) return ERR_PTR(-EINVAL); it = kzalloc(sizeof(*it), GFP_KERNEL); @@ -4618,10 +4630,11 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, return ERR_PTR(-ENOMEM); of->priv = it; css_task_iter_start(&cgrp->self, iter_flags, it); - } else if (!(*pos)++) { + } else if (!(*pos)) { css_task_iter_end(it); css_task_iter_start(&cgrp->self, iter_flags, it); - } + } else + return it->cur_task; return cgroup_procs_next(s, NULL, NULL); } @@ -6258,6 +6271,10 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) return; } + /* Don't associate the sock with unrelated interrupted task's cgroup. */ + if (in_interrupt()) + return; + rcu_read_lock(); while (true) { diff --git a/kernel/events/core.c b/kernel/events/core.c index bbdfac0182f4..9b89ef176247 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8255,23 +8255,22 @@ static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog, enum perf_bpf_event_type type) { bool unregister = type == PERF_BPF_EVENT_PROG_UNLOAD; - char sym[KSYM_NAME_LEN]; int i; if (prog->aux->func_cnt == 0) { - bpf_get_prog_name(prog, sym); perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, (u64)(unsigned long)prog->bpf_func, - prog->jited_len, unregister, sym); + prog->jited_len, unregister, + prog->aux->ksym.name); } else { for (i = 0; i < prog->aux->func_cnt; i++) { struct bpf_prog *subprog = prog->aux->func[i]; - bpf_get_prog_name(subprog, sym); perf_event_ksymbol( PERF_RECORD_KSYMBOL_TYPE_BPF, (u64)(unsigned long)subprog->bpf_func, - subprog->jited_len, unregister, sym); + subprog->jited_len, unregister, + prog->aux->ksym.name); } } } diff --git a/kernel/exit.c b/kernel/exit.c index 2833ffb0c211..0b81b26a872a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -619,8 +619,8 @@ static void forget_original_parent(struct task_struct *father, reaper = find_new_reaper(father, reaper); list_for_each_entry(p, &father->children, sibling) { for_each_thread(p, t) { - t->real_parent = reaper; - BUG_ON((!t->ptrace) != (t->parent == father)); + RCU_INIT_POINTER(t->real_parent, reaper); + BUG_ON((!t->ptrace) != (rcu_access_pointer(t->parent) == father)); if (likely(!t->ptrace)) t->parent = t->real_parent; if (t->pdeath_signal) diff --git a/kernel/extable.c b/kernel/extable.c index a0024f27d3a1..7681f87e89dd 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -149,8 +149,6 @@ int kernel_text_address(unsigned long addr) goto out; if (is_bpf_text_address(addr)) goto out; - if (is_bpf_image_address(addr)) - goto out; ret = 0; out: if (no_rcu) diff --git a/kernel/fork.c b/kernel/fork.c index 60a1295f4384..86425305cd4a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1508,7 +1508,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) return 0; } sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL); - rcu_assign_pointer(tsk->sighand, sig); + RCU_INIT_POINTER(tsk->sighand, sig); if (!sig) return -ENOMEM; diff --git a/kernel/pid.c b/kernel/pid.c index 0f4ecb57214c..647b4bb457b5 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -247,6 +247,16 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, tmp = tmp->parent; } + /* + * ENOMEM is not the most obvious choice especially for the case + * where the child subreaper has already exited and the pid + * namespace denies the creation of any new processes. But ENOMEM + * is what we have exposed to userspace for a long time and it is + * documented behavior for pid namespaces. So we can't easily + * change it even if there were an error code better suited. + */ + retval = -ENOMEM; + if (unlikely(is_child_reaper(pid))) { if (pid_ns_prepare_proc(ns)) goto out_free; diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index ddade80ad276..d82b7b88d616 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1681,7 +1681,7 @@ static unsigned long minimum_image_size(unsigned long saveable) * hibernation for allocations made while saving the image and for device * drivers, in case they need to allocate memory from their hibernation * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough - * estimate) and reserverd_size divided by PAGE_SIZE (which is tunable through + * estimate) and reserved_size divided by PAGE_SIZE (which is tunable through * /sys/power/reserved_size, respectively). To make this happen, we compute the * total number of available page frames and allocate at least * diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3c8a379c357e..c1217bfe5e81 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8337,6 +8337,8 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd, sgs->group_capacity = group->sgc->capacity; + sgs->group_weight = group->group_weight; + sgs->group_type = group_classify(sd->imbalance_pct, group, sgs); /* diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 0735ae8545d8..ca39dc3230cb 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -335,6 +335,7 @@ static void put_probe_ref(void) static void blk_trace_cleanup(struct blk_trace *bt) { + synchronize_rcu(); blk_trace_free(bt); put_probe_ref(); } @@ -629,8 +630,10 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name, static int __blk_trace_startstop(struct request_queue *q, int start) { int ret; - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + bt = rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex)); if (bt == NULL) return -EINVAL; @@ -740,8 +743,8 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) void blk_trace_shutdown(struct request_queue *q) { mutex_lock(&q->blk_trace_mutex); - - if (q->blk_trace) { + if (rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex))) { __blk_trace_startstop(q, 0); __blk_trace_remove(q); } @@ -752,8 +755,10 @@ void blk_trace_shutdown(struct request_queue *q) #ifdef CONFIG_BLK_CGROUP static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + /* We don't use the 'bt' value here except as an optimization... */ + bt = rcu_dereference_protected(q->blk_trace, 1); if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) return 0; @@ -796,10 +801,14 @@ blk_trace_request_get_cgid(struct request_queue *q, struct request *rq) static void blk_add_trace_rq(struct request *rq, int error, unsigned int nr_bytes, u32 what, u64 cgid) { - struct blk_trace *bt = rq->q->blk_trace; + struct blk_trace *bt; - if (likely(!bt)) + rcu_read_lock(); + bt = rcu_dereference(rq->q->blk_trace); + if (likely(!bt)) { + rcu_read_unlock(); return; + } if (blk_rq_is_passthrough(rq)) what |= BLK_TC_ACT(BLK_TC_PC); @@ -808,6 +817,7 @@ static void blk_add_trace_rq(struct request *rq, int error, __blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq), rq->cmd_flags, what, error, 0, NULL, cgid); + rcu_read_unlock(); } static void blk_add_trace_rq_insert(void *ignore, @@ -853,14 +863,19 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq, static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, u32 what, int error) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; - if (likely(!bt)) + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + if (likely(!bt)) { + rcu_read_unlock(); return; + } __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf, what, error, 0, NULL, blk_trace_bio_get_cgid(q, bio)); + rcu_read_unlock(); } static void blk_add_trace_bio_bounce(void *ignore, @@ -905,11 +920,14 @@ static void blk_add_trace_getrq(void *ignore, if (bio) blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0); else { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); if (bt) __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0, NULL, 0); + rcu_read_unlock(); } } @@ -921,27 +939,35 @@ static void blk_add_trace_sleeprq(void *ignore, if (bio) blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0); else { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); if (bt) __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ, 0, 0, NULL, 0); + rcu_read_unlock(); } } static void blk_add_trace_plug(void *ignore, struct request_queue *q) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); if (bt) __blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0); + rcu_read_unlock(); } static void blk_add_trace_unplug(void *ignore, struct request_queue *q, unsigned int depth, bool explicit) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); if (bt) { __be64 rpdu = cpu_to_be64(depth); u32 what; @@ -953,14 +979,17 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q, __blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0); } + rcu_read_unlock(); } static void blk_add_trace_split(void *ignore, struct request_queue *q, struct bio *bio, unsigned int pdu) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); if (bt) { __be64 rpdu = cpu_to_be64(pdu); @@ -969,6 +998,7 @@ static void blk_add_trace_split(void *ignore, BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu), &rpdu, blk_trace_bio_get_cgid(q, bio)); } + rcu_read_unlock(); } /** @@ -988,11 +1018,15 @@ static void blk_add_trace_bio_remap(void *ignore, struct request_queue *q, struct bio *bio, dev_t dev, sector_t from) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; struct blk_io_trace_remap r; - if (likely(!bt)) + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + if (likely(!bt)) { + rcu_read_unlock(); return; + } r.device_from = cpu_to_be32(dev); r.device_to = cpu_to_be32(bio_dev(bio)); @@ -1001,6 +1035,7 @@ static void blk_add_trace_bio_remap(void *ignore, __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status, sizeof(r), &r, blk_trace_bio_get_cgid(q, bio)); + rcu_read_unlock(); } /** @@ -1021,11 +1056,15 @@ static void blk_add_trace_rq_remap(void *ignore, struct request *rq, dev_t dev, sector_t from) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; struct blk_io_trace_remap r; - if (likely(!bt)) + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + if (likely(!bt)) { + rcu_read_unlock(); return; + } r.device_from = cpu_to_be32(dev); r.device_to = cpu_to_be32(disk_devt(rq->rq_disk)); @@ -1034,6 +1073,7 @@ static void blk_add_trace_rq_remap(void *ignore, __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rq_data_dir(rq), 0, BLK_TA_REMAP, 0, sizeof(r), &r, blk_trace_request_get_cgid(q, rq)); + rcu_read_unlock(); } /** @@ -1051,14 +1091,19 @@ void blk_add_driver_data(struct request_queue *q, struct request *rq, void *data, size_t len) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; - if (likely(!bt)) + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + if (likely(!bt)) { + rcu_read_unlock(); return; + } __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0, BLK_TA_DRV_DATA, 0, len, data, blk_trace_request_get_cgid(q, rq)); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(blk_add_driver_data); @@ -1597,6 +1642,7 @@ static int blk_trace_remove_queue(struct request_queue *q) return -EINVAL; put_probe_ref(); + synchronize_rcu(); blk_trace_free(bt); return 0; } @@ -1758,6 +1804,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, struct hd_struct *p = dev_to_part(dev); struct request_queue *q; struct block_device *bdev; + struct blk_trace *bt; ssize_t ret = -ENXIO; bdev = bdget(part_devt(p)); @@ -1770,21 +1817,23 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, mutex_lock(&q->blk_trace_mutex); + bt = rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex)); if (attr == &dev_attr_enable) { - ret = sprintf(buf, "%u\n", !!q->blk_trace); + ret = sprintf(buf, "%u\n", !!bt); goto out_unlock_bdev; } - if (q->blk_trace == NULL) + if (bt == NULL) ret = sprintf(buf, "disabled\n"); else if (attr == &dev_attr_act_mask) - ret = blk_trace_mask2str(buf, q->blk_trace->act_mask); + ret = blk_trace_mask2str(buf, bt->act_mask); else if (attr == &dev_attr_pid) - ret = sprintf(buf, "%u\n", q->blk_trace->pid); + ret = sprintf(buf, "%u\n", bt->pid); else if (attr == &dev_attr_start_lba) - ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba); + ret = sprintf(buf, "%llu\n", bt->start_lba); else if (attr == &dev_attr_end_lba) - ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); + ret = sprintf(buf, "%llu\n", bt->end_lba); out_unlock_bdev: mutex_unlock(&q->blk_trace_mutex); @@ -1801,6 +1850,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, struct block_device *bdev; struct request_queue *q; struct hd_struct *p; + struct blk_trace *bt; u64 value; ssize_t ret = -EINVAL; @@ -1831,8 +1881,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, mutex_lock(&q->blk_trace_mutex); + bt = rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex)); if (attr == &dev_attr_enable) { - if (!!value == !!q->blk_trace) { + if (!!value == !!bt) { ret = 0; goto out_unlock_bdev; } @@ -1844,18 +1896,21 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, } ret = 0; - if (q->blk_trace == NULL) + if (bt == NULL) { ret = blk_trace_setup_queue(q, bdev); + bt = rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex)); + } if (ret == 0) { if (attr == &dev_attr_act_mask) - q->blk_trace->act_mask = value; + bt->act_mask = value; else if (attr == &dev_attr_pid) - q->blk_trace->pid = value; + bt->pid = value; else if (attr == &dev_attr_start_lba) - q->blk_trace->start_lba = value; + bt->start_lba = value; else if (attr == &dev_attr_end_lba) - q->blk_trace->end_lba = value; + bt->end_lba = value; } out_unlock_bdev: diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 07764c761073..e619eedb5919 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -843,6 +843,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_send_signal_thread_proto; case BPF_FUNC_perf_event_read_value: return &bpf_perf_event_read_value_proto; + case BPF_FUNC_get_ns_current_pid_tgid: + return &bpf_get_ns_current_pid_tgid_proto; default: return NULL; } @@ -1143,6 +1145,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { }; extern const struct bpf_func_proto bpf_skb_output_proto; +extern const struct bpf_func_proto bpf_xdp_output_proto; BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args, struct bpf_map *, map, u64, flags) @@ -1218,6 +1221,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) #ifdef CONFIG_NET case BPF_FUNC_skb_output: return &bpf_skb_output_proto; + case BPF_FUNC_xdp_output: + return &bpf_xdp_output_proto; #endif default: return raw_tp_prog_func_proto(func_id, prog); @@ -1252,6 +1257,13 @@ static bool tracing_prog_is_valid_access(int off, int size, return btf_ctx_access(off, size, type, prog, info); } +int __weak bpf_prog_test_run_tracing(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + return -ENOTSUPP; +} + const struct bpf_verifier_ops raw_tracepoint_verifier_ops = { .get_func_proto = raw_tp_prog_func_proto, .is_valid_access = raw_tp_prog_is_valid_access, @@ -1266,6 +1278,7 @@ const struct bpf_verifier_ops tracing_verifier_ops = { }; const struct bpf_prog_ops tracing_prog_ops = { + .test_run = bpf_prog_test_run_tracing, }; static bool raw_tp_writable_prog_is_valid_access(int off, int size, diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3f7ee102868a..fd81c7de77a7 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1547,6 +1547,8 @@ static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end) rec = bsearch(&key, pg->records, pg->index, sizeof(struct dyn_ftrace), ftrace_cmp_recs); + if (rec) + break; } return rec; } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 301db4406bc3..4e01c448b4b4 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1411,14 +1411,16 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, return; rcu_read_lock(); retry: - if (req_cpu == WORK_CPU_UNBOUND) - cpu = wq_select_unbound_cpu(raw_smp_processor_id()); - /* pwq which will be used unless @work is executing elsewhere */ - if (!(wq->flags & WQ_UNBOUND)) - pwq = per_cpu_ptr(wq->cpu_pwqs, cpu); - else + if (wq->flags & WQ_UNBOUND) { + if (req_cpu == WORK_CPU_UNBOUND) + cpu = wq_select_unbound_cpu(raw_smp_processor_id()); pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu)); + } else { + if (req_cpu == WORK_CPU_UNBOUND) + cpu = raw_smp_processor_id(); + pwq = per_cpu_ptr(wq->cpu_pwqs, cpu); + } /* * If @work was previously on a different pool, it might still be diff --git a/mm/huge_memory.c b/mm/huge_memory.c index b08b199f9a11..24ad53b4dfc0 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3043,8 +3043,7 @@ void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, return; flush_cache_range(vma, address, address + HPAGE_PMD_SIZE); - pmdval = *pvmw->pmd; - pmdp_invalidate(vma, address, pvmw->pmd); + pmdval = pmdp_invalidate(vma, address, pvmw->pmd); if (pmd_dirty(pmdval)) set_page_dirty(page); entry = make_migration_entry(page, pmd_write(pmdval)); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d09776cd6e10..2058b8da18db 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6682,19 +6682,9 @@ void mem_cgroup_sk_alloc(struct sock *sk) if (!mem_cgroup_sockets_enabled) return; - /* - * Socket cloning can throw us here with sk_memcg already - * filled. It won't however, necessarily happen from - * process context. So the test for root memcg given - * the current task's memcg won't help us in this case. - * - * Respecting the original socket's memcg is a better - * decision in this case. - */ - if (sk->sk_memcg) { - css_get(&sk->sk_memcg->css); + /* Do not associate the sock with unrelated interrupted task's memcg. */ + if (in_interrupt()) return; - } rcu_read_lock(); memcg = mem_cgroup_from_task(current); diff --git a/mm/memory.c b/mm/memory.c index 0bccc622e482..e8bfdf0d9d1d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2257,7 +2257,7 @@ static inline bool cow_user_page(struct page *dst, struct page *src, bool ret; void *kaddr; void __user *uaddr; - bool force_mkyoung; + bool locked = false; struct vm_area_struct *vma = vmf->vma; struct mm_struct *mm = vma->vm_mm; unsigned long addr = vmf->address; @@ -2282,11 +2282,11 @@ static inline bool cow_user_page(struct page *dst, struct page *src, * On architectures with software "accessed" bits, we would * take a double page fault, so mark it accessed here. */ - force_mkyoung = arch_faults_on_old_pte() && !pte_young(vmf->orig_pte); - if (force_mkyoung) { + if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) { pte_t entry; vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); + locked = true; if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { /* * Other thread has already handled the fault @@ -2310,18 +2310,37 @@ static inline bool cow_user_page(struct page *dst, struct page *src, * zeroes. */ if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) { + if (locked) + goto warn; + + /* Re-validate under PTL if the page is still mapped */ + vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); + locked = true; + if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { + /* The PTE changed under us. Retry page fault. */ + ret = false; + goto pte_unlock; + } + /* - * Give a warn in case there can be some obscure - * use-case + * The same page can be mapped back since last copy attampt. + * Try to copy again under PTL. */ - WARN_ON_ONCE(1); - clear_page(kaddr); + if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) { + /* + * Give a warn in case there can be some obscure + * use-case + */ +warn: + WARN_ON_ONCE(1); + clear_page(kaddr); + } } ret = true; pte_unlock: - if (force_mkyoung) + if (locked) pte_unmap_unlock(vmf->pte, vmf->ptl); kunmap_atomic(kaddr); flush_dcache_page(dst); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 0a54ffac8c68..19389cdc16a5 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -574,7 +574,13 @@ EXPORT_SYMBOL_GPL(restore_online_page_callback); void generic_online_page(struct page *page, unsigned int order) { - kernel_map_pages(page, 1 << order, 1); + /* + * Freeing the page with debug_pagealloc enabled will try to unmap it, + * so we should map it first. This is better than introducing a special + * case in page freeing fast path. + */ + if (debug_pagealloc_enabled_static()) + kernel_map_pages(page, 1 << order, 1); __free_pages_core(page, order); totalram_pages_add(1UL << order); #ifdef CONFIG_HIGHMEM diff --git a/mm/mprotect.c b/mm/mprotect.c index 7a8e84f86831..311c0dadf71c 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -161,6 +161,31 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, return pages; } +/* + * Used when setting automatic NUMA hinting protection where it is + * critical that a numa hinting PMD is not confused with a bad PMD. + */ +static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd) +{ + pmd_t pmdval = pmd_read_atomic(pmd); + + /* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + barrier(); +#endif + + if (pmd_none(pmdval)) + return 1; + if (pmd_trans_huge(pmdval)) + return 0; + if (unlikely(pmd_bad(pmdval))) { + pmd_clear_bad(pmd); + return 1; + } + + return 0; +} + static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa) @@ -178,8 +203,17 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, unsigned long this_pages; next = pmd_addr_end(addr, end); - if (!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd) && !pmd_devmap(*pmd) - && pmd_none_or_clear_bad(pmd)) + + /* + * Automatic NUMA balancing walks the tables with mmap_sem + * held for read. It's possible a parallel update to occur + * between pmd_trans_huge() and a pmd_none_or_clear_bad() + * check leading to a false positive and clearing. + * Hence, it's necessary to atomically read the PMD value + * for all the checks. + */ + if (!is_swap_pmd(*pmd) && !pmd_devmap(*pmd) && + pmd_none_or_clear_bad_unless_trans_huge(pmd)) goto next; /* invoke the mmu notifier if the pmd is populated */ diff --git a/mm/z3fold.c b/mm/z3fold.c index 43754d8ebce8..42f31c4b53ad 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -41,7 +41,6 @@ #include <linux/workqueue.h> #include <linux/slab.h> #include <linux/spinlock.h> -#include <linux/rwlock.h> #include <linux/zpool.h> #include <linux/magic.h> diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index f0209505e41a..a7c8dd7ae513 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -789,6 +789,10 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) lockdep_assert_held(&hard_iface->bat_iv.ogm_buff_mutex); + /* interface already disabled by batadv_iv_ogm_iface_disable */ + if (!*ogm_buff) + return; + /* the interface gets activated here to avoid race conditions between * the moment of activating the interface in * hardif_activate_interface() where the originator mac is set and diff --git a/net/bluetooth/a2mp.h b/net/bluetooth/a2mp.h index 0029d5119be6..2fd253a61a2a 100644 --- a/net/bluetooth/a2mp.h +++ b/net/bluetooth/a2mp.h @@ -36,14 +36,14 @@ struct a2mp_cmd { __u8 code; __u8 ident; __le16 len; - __u8 data[0]; + __u8 data[]; } __packed; /* A2MP command codes */ #define A2MP_COMMAND_REJ 0x01 struct a2mp_cmd_rej { __le16 reason; - __u8 data[0]; + __u8 data[]; } __packed; #define A2MP_DISCOVER_REQ 0x02 @@ -62,7 +62,7 @@ struct a2mp_cl { struct a2mp_discov_rsp { __le16 mtu; __le16 ext_feat; - struct a2mp_cl cl[0]; + struct a2mp_cl cl[]; } __packed; #define A2MP_CHANGE_NOTIFY 0x04 @@ -93,7 +93,7 @@ struct a2mp_amp_assoc_req { struct a2mp_amp_assoc_rsp { __u8 id; __u8 status; - __u8 amp_assoc[0]; + __u8 amp_assoc[]; } __packed; #define A2MP_CREATEPHYSLINK_REQ 0x0A @@ -101,7 +101,7 @@ struct a2mp_amp_assoc_rsp { struct a2mp_physlink_req { __u8 local_id; __u8 remote_id; - __u8 amp_assoc[0]; + __u8 amp_assoc[]; } __packed; #define A2MP_CREATEPHYSLINK_RSP 0x0B diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h index 24f18b133959..9680473ed7ef 100644 --- a/net/bluetooth/bnep/bnep.h +++ b/net/bluetooth/bnep/bnep.h @@ -74,14 +74,14 @@ struct bnep_setup_conn_req { __u8 type; __u8 ctrl; __u8 uuid_size; - __u8 service[0]; + __u8 service[]; } __packed; struct bnep_set_filter_req { __u8 type; __u8 ctrl; __be16 len; - __u8 list[0]; + __u8 list[]; } __packed; struct bnep_control_rsp { @@ -93,7 +93,7 @@ struct bnep_control_rsp { struct bnep_ext_hdr { __u8 type; __u8 len; - __u8 data[0]; + __u8 data[]; } __packed; /* BNEP ioctl defines */ diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 87691404d0c6..e245bc155cc2 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -467,6 +467,23 @@ static void hci_conn_auto_accept(struct work_struct *work) &conn->dst); } +static void le_disable_advertising(struct hci_dev *hdev) +{ + if (ext_adv_capable(hdev)) { + struct hci_cp_le_set_ext_adv_enable cp; + + cp.enable = 0x00; + cp.num_of_sets = 0x00; + + hci_send_cmd(hdev, HCI_OP_LE_SET_EXT_ADV_ENABLE, sizeof(cp), + &cp); + } else { + u8 enable = 0x00; + hci_send_cmd(hdev, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), + &enable); + } +} + static void le_conn_timeout(struct work_struct *work) { struct hci_conn *conn = container_of(work, struct hci_conn, @@ -481,9 +498,8 @@ static void le_conn_timeout(struct work_struct *work) * (which doesn't have a timeout of its own). */ if (conn->role == HCI_ROLE_SLAVE) { - u8 enable = 0x00; - hci_send_cmd(hdev, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), - &enable); + /* Disable LE Advertising */ + le_disable_advertising(hdev); hci_le_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT); return; } @@ -898,6 +914,16 @@ static void hci_req_directed_advertising(struct hci_request *req, cp.peer_addr_type = conn->dst_type; bacpy(&cp.peer_addr, &conn->dst); + /* As per Core Spec 5.2 Vol 2, PART E, Sec 7.8.53, for + * advertising_event_property LE_LEGACY_ADV_DIRECT_IND + * does not supports advertising data when the advertising set already + * contains some, the controller shall return erroc code 'Invalid + * HCI Command Parameters(0x12). + * So it is required to remove adv set for handle 0x00. since we use + * instance 0 for directed adv. + */ + hci_req_add(req, HCI_OP_LE_REMOVE_ADV_SET, sizeof(cp.handle), &cp.handle); + hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_PARAMS, sizeof(cp), &cp); if (own_addr_type == ADDR_LE_DEV_RANDOM && @@ -1029,11 +1055,8 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, * anyway have to disable it in order to start directed * advertising. */ - if (hci_dev_test_flag(hdev, HCI_LE_ADV)) { - u8 enable = 0x00; - hci_req_add(&req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), - &enable); - } + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) + __hci_req_disable_advertising(&req); /* If requested to connect as slave use directed advertising */ if (conn->role == HCI_ROLE_SLAVE) { @@ -1725,3 +1748,110 @@ struct hci_chan *hci_chan_lookup_handle(struct hci_dev *hdev, __u16 handle) return hchan; } + +u32 hci_conn_get_phy(struct hci_conn *conn) +{ + u32 phys = 0; + + hci_dev_lock(conn->hdev); + + /* BLUETOOTH CORE SPECIFICATION Version 5.2 | Vol 2, Part B page 471: + * Table 6.2: Packets defined for synchronous, asynchronous, and + * CSB logical transport types. + */ + switch (conn->type) { + case SCO_LINK: + /* SCO logical transport (1 Mb/s): + * HV1, HV2, HV3 and DV. + */ + phys |= BT_PHY_BR_1M_1SLOT; + + break; + + case ACL_LINK: + /* ACL logical transport (1 Mb/s) ptt=0: + * DH1, DM3, DH3, DM5 and DH5. + */ + phys |= BT_PHY_BR_1M_1SLOT; + + if (conn->pkt_type & (HCI_DM3 | HCI_DH3)) + phys |= BT_PHY_BR_1M_3SLOT; + + if (conn->pkt_type & (HCI_DM5 | HCI_DH5)) + phys |= BT_PHY_BR_1M_5SLOT; + + /* ACL logical transport (2 Mb/s) ptt=1: + * 2-DH1, 2-DH3 and 2-DH5. + */ + if (!(conn->pkt_type & HCI_2DH1)) + phys |= BT_PHY_EDR_2M_1SLOT; + + if (!(conn->pkt_type & HCI_2DH3)) + phys |= BT_PHY_EDR_2M_3SLOT; + + if (!(conn->pkt_type & HCI_2DH5)) + phys |= BT_PHY_EDR_2M_5SLOT; + + /* ACL logical transport (3 Mb/s) ptt=1: + * 3-DH1, 3-DH3 and 3-DH5. + */ + if (!(conn->pkt_type & HCI_3DH1)) + phys |= BT_PHY_EDR_3M_1SLOT; + + if (!(conn->pkt_type & HCI_3DH3)) + phys |= BT_PHY_EDR_3M_3SLOT; + + if (!(conn->pkt_type & HCI_3DH5)) + phys |= BT_PHY_EDR_3M_5SLOT; + + break; + + case ESCO_LINK: + /* eSCO logical transport (1 Mb/s): EV3, EV4 and EV5 */ + phys |= BT_PHY_BR_1M_1SLOT; + + if (!(conn->pkt_type & (ESCO_EV4 | ESCO_EV5))) + phys |= BT_PHY_BR_1M_3SLOT; + + /* eSCO logical transport (2 Mb/s): 2-EV3, 2-EV5 */ + if (!(conn->pkt_type & ESCO_2EV3)) + phys |= BT_PHY_EDR_2M_1SLOT; + + if (!(conn->pkt_type & ESCO_2EV5)) + phys |= BT_PHY_EDR_2M_3SLOT; + + /* eSCO logical transport (3 Mb/s): 3-EV3, 3-EV5 */ + if (!(conn->pkt_type & ESCO_3EV3)) + phys |= BT_PHY_EDR_3M_1SLOT; + + if (!(conn->pkt_type & ESCO_3EV5)) + phys |= BT_PHY_EDR_3M_3SLOT; + + break; + + case LE_LINK: + if (conn->le_tx_phy & HCI_LE_SET_PHY_1M) + phys |= BT_PHY_LE_1M_TX; + + if (conn->le_rx_phy & HCI_LE_SET_PHY_1M) + phys |= BT_PHY_LE_1M_RX; + + if (conn->le_tx_phy & HCI_LE_SET_PHY_2M) + phys |= BT_PHY_LE_2M_TX; + + if (conn->le_rx_phy & HCI_LE_SET_PHY_2M) + phys |= BT_PHY_LE_2M_RX; + + if (conn->le_tx_phy & HCI_LE_SET_PHY_CODED) + phys |= BT_PHY_LE_CODED_TX; + + if (conn->le_rx_phy & HCI_LE_SET_PHY_CODED) + phys |= BT_PHY_LE_CODED_RX; + + break; + } + + hci_dev_unlock(conn->hdev); + + return phys; +} diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index cbbc34a006d1..dbd2ad3a26ed 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -31,6 +31,8 @@ #include <linux/debugfs.h> #include <linux/crypto.h> #include <linux/property.h> +#include <linux/suspend.h> +#include <linux/wait.h> #include <asm/unaligned.h> #include <net/bluetooth/bluetooth.h> @@ -603,6 +605,9 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt) if (hdev->commands[8] & 0x01) hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL); + if (hdev->commands[18] & 0x04) + hci_req_add(req, HCI_OP_READ_DEF_ERR_DATA_REPORTING, 0, NULL); + /* Some older Broadcom based Bluetooth 1.2 controllers do not * support the Read Page Scan Type command. Check support for * this command in the bit mask of supported commands. @@ -838,6 +843,26 @@ static int hci_init4_req(struct hci_request *req, unsigned long opt) sizeof(support), &support); } + /* Set erroneous data reporting if supported to the wideband speech + * setting value + */ + if (hdev->commands[18] & 0x08) { + bool enabled = hci_dev_test_flag(hdev, + HCI_WIDEBAND_SPEECH_ENABLED); + + if (enabled != + (hdev->err_data_reporting == ERR_DATA_REPORTING_ENABLED)) { + struct hci_cp_write_def_err_data_reporting cp; + + cp.err_data_reporting = enabled ? + ERR_DATA_REPORTING_ENABLED : + ERR_DATA_REPORTING_DISABLED; + + hci_req_add(req, HCI_OP_WRITE_DEF_ERR_DATA_REPORTING, + sizeof(cp), &cp); + } + } + /* Set Suggested Default Data Length to maximum if supported */ if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) { struct hci_cp_le_write_def_data_len cp; @@ -1764,6 +1789,9 @@ int hci_dev_do_close(struct hci_dev *hdev) clear_bit(HCI_RUNNING, &hdev->flags); hci_sock_dev_event(hdev, HCI_DEV_CLOSE); + if (test_and_clear_bit(SUSPEND_POWERING_DOWN, hdev->suspend_tasks)) + wake_up(&hdev->suspend_wait_q); + /* After this point our queues are empty * and no tasks are scheduled. */ hdev->close(hdev); @@ -2285,7 +2313,7 @@ void hci_link_keys_clear(struct hci_dev *hdev) { struct link_key *key; - list_for_each_entry_rcu(key, &hdev->link_keys, list) { + list_for_each_entry(key, &hdev->link_keys, list) { list_del_rcu(&key->list); kfree_rcu(key, rcu); } @@ -2295,7 +2323,7 @@ void hci_smp_ltks_clear(struct hci_dev *hdev) { struct smp_ltk *k; - list_for_each_entry_rcu(k, &hdev->long_term_keys, list) { + list_for_each_entry(k, &hdev->long_term_keys, list) { list_del_rcu(&k->list); kfree_rcu(k, rcu); } @@ -2305,7 +2333,7 @@ void hci_smp_irks_clear(struct hci_dev *hdev) { struct smp_irk *k; - list_for_each_entry_rcu(k, &hdev->identity_resolving_keys, list) { + list_for_each_entry(k, &hdev->identity_resolving_keys, list) { list_del_rcu(&k->list); kfree_rcu(k, rcu); } @@ -2315,7 +2343,7 @@ void hci_blocked_keys_clear(struct hci_dev *hdev) { struct blocked_key *b; - list_for_each_entry_rcu(b, &hdev->blocked_keys, list) { + list_for_each_entry(b, &hdev->blocked_keys, list) { list_del_rcu(&b->list); kfree_rcu(b, rcu); } @@ -2327,7 +2355,7 @@ bool hci_is_blocked_key(struct hci_dev *hdev, u8 type, u8 val[16]) struct blocked_key *b; rcu_read_lock(); - list_for_each_entry(b, &hdev->blocked_keys, list) { + list_for_each_entry_rcu(b, &hdev->blocked_keys, list) { if (b->type == type && !memcmp(b->val, val, sizeof(b->val))) { blocked = true; break; @@ -3241,6 +3269,93 @@ void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr, } } +static int hci_suspend_wait_event(struct hci_dev *hdev) +{ +#define WAKE_COND \ + (find_first_bit(hdev->suspend_tasks, __SUSPEND_NUM_TASKS) == \ + __SUSPEND_NUM_TASKS) + + int i; + int ret = wait_event_timeout(hdev->suspend_wait_q, + WAKE_COND, SUSPEND_NOTIFIER_TIMEOUT); + + if (ret == 0) { + bt_dev_dbg(hdev, "Timed out waiting for suspend"); + for (i = 0; i < __SUSPEND_NUM_TASKS; ++i) { + if (test_bit(i, hdev->suspend_tasks)) + bt_dev_dbg(hdev, "Bit %d is set", i); + clear_bit(i, hdev->suspend_tasks); + } + + ret = -ETIMEDOUT; + } else { + ret = 0; + } + + return ret; +} + +static void hci_prepare_suspend(struct work_struct *work) +{ + struct hci_dev *hdev = + container_of(work, struct hci_dev, suspend_prepare); + + hci_dev_lock(hdev); + hci_req_prepare_suspend(hdev, hdev->suspend_state_next); + hci_dev_unlock(hdev); +} + +static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action, + void *data) +{ + struct hci_dev *hdev = + container_of(nb, struct hci_dev, suspend_notifier); + int ret = 0; + + /* If powering down, wait for completion. */ + if (mgmt_powering_down(hdev)) { + set_bit(SUSPEND_POWERING_DOWN, hdev->suspend_tasks); + ret = hci_suspend_wait_event(hdev); + if (ret) + goto done; + } + + /* Suspend notifier should only act on events when powered. */ + if (!hdev_is_powered(hdev)) + goto done; + + if (action == PM_SUSPEND_PREPARE) { + /* Suspend consists of two actions: + * - First, disconnect everything and make the controller not + * connectable (disabling scanning) + * - Second, program event filter/whitelist and enable scan + */ + hdev->suspend_state_next = BT_SUSPEND_DISCONNECT; + set_bit(SUSPEND_PREPARE_NOTIFIER, hdev->suspend_tasks); + queue_work(hdev->req_workqueue, &hdev->suspend_prepare); + ret = hci_suspend_wait_event(hdev); + + /* If the disconnect portion failed, don't attempt to complete + * by configuring the whitelist. The suspend notifier will + * follow a cancelled suspend with a PM_POST_SUSPEND + * notification. + */ + if (!ret) { + hdev->suspend_state_next = BT_SUSPEND_COMPLETE; + set_bit(SUSPEND_PREPARE_NOTIFIER, hdev->suspend_tasks); + queue_work(hdev->req_workqueue, &hdev->suspend_prepare); + ret = hci_suspend_wait_event(hdev); + } + } else if (action == PM_POST_SUSPEND) { + hdev->suspend_state_next = BT_RUNNING; + set_bit(SUSPEND_PREPARE_NOTIFIER, hdev->suspend_tasks); + queue_work(hdev->req_workqueue, &hdev->suspend_prepare); + ret = hci_suspend_wait_event(hdev); + } + +done: + return ret ? notifier_from_errno(-EBUSY) : NOTIFY_STOP; +} /* Alloc HCI device */ struct hci_dev *hci_alloc_dev(void) { @@ -3299,6 +3414,7 @@ struct hci_dev *hci_alloc_dev(void) INIT_LIST_HEAD(&hdev->mgmt_pending); INIT_LIST_HEAD(&hdev->blacklist); INIT_LIST_HEAD(&hdev->whitelist); + INIT_LIST_HEAD(&hdev->wakeable); INIT_LIST_HEAD(&hdev->uuids); INIT_LIST_HEAD(&hdev->link_keys); INIT_LIST_HEAD(&hdev->long_term_keys); @@ -3318,6 +3434,7 @@ struct hci_dev *hci_alloc_dev(void) INIT_WORK(&hdev->tx_work, hci_tx_work); INIT_WORK(&hdev->power_on, hci_power_on); INIT_WORK(&hdev->error_reset, hci_error_reset); + INIT_WORK(&hdev->suspend_prepare, hci_prepare_suspend); INIT_DELAYED_WORK(&hdev->power_off, hci_power_off); @@ -3326,6 +3443,7 @@ struct hci_dev *hci_alloc_dev(void) skb_queue_head_init(&hdev->raw_q); init_waitqueue_head(&hdev->req_wait_q); + init_waitqueue_head(&hdev->suspend_wait_q); INIT_DELAYED_WORK(&hdev->cmd_timer, hci_cmd_timeout); @@ -3437,6 +3555,11 @@ int hci_register_dev(struct hci_dev *hdev) hci_sock_dev_event(hdev, HCI_DEV_REG); hci_dev_hold(hdev); + hdev->suspend_notifier.notifier_call = hci_suspend_notifier; + error = register_pm_notifier(&hdev->suspend_notifier); + if (error) + goto err_wqueue; + queue_work(hdev->req_workqueue, &hdev->power_on); return id; @@ -3470,6 +3593,8 @@ void hci_unregister_dev(struct hci_dev *hdev) hci_dev_do_close(hdev); + unregister_pm_notifier(&hdev->suspend_notifier); + if (!test_bit(HCI_INIT, &hdev->flags) && !hci_dev_test_flag(hdev, HCI_SETUP) && !hci_dev_test_flag(hdev, HCI_CONFIG)) { @@ -4387,13 +4512,16 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_sco_hdr *hdr = (void *) skb->data; struct hci_conn *conn; - __u16 handle; + __u16 handle, flags; skb_pull(skb, HCI_SCO_HDR_SIZE); handle = __le16_to_cpu(hdr->handle); + flags = hci_flags(handle); + handle = hci_handle(handle); - BT_DBG("%s len %d handle 0x%4.4x", hdev->name, skb->len, handle); + BT_DBG("%s len %d handle 0x%4.4x flags 0x%4.4x", hdev->name, skb->len, + handle, flags); hdev->stat.sco_rx++; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 6ddc4a74a5e4..20408d386268 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -901,6 +901,37 @@ static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev, hdev->inq_tx_power = rp->tx_power; } +static void hci_cc_read_def_err_data_reporting(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_read_def_err_data_reporting *rp = (void *)skb->data; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (rp->status) + return; + + hdev->err_data_reporting = rp->err_data_reporting; +} + +static void hci_cc_write_def_err_data_reporting(struct hci_dev *hdev, + struct sk_buff *skb) +{ + __u8 status = *((__u8 *)skb->data); + struct hci_cp_write_def_err_data_reporting *cp; + + BT_DBG("%s status 0x%2.2x", hdev->name, status); + + if (status) + return; + + cp = hci_sent_cmd_data(hdev, HCI_OP_WRITE_DEF_ERR_DATA_REPORTING); + if (!cp) + return; + + hdev->err_data_reporting = cp->err_data_reporting; +} + static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_rp_pin_code_reply *rp = (void *) skb->data; @@ -2202,10 +2233,22 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status) hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); - if (conn) + if (conn) { + u8 type = conn->type; + mgmt_disconnect_failed(hdev, &conn->dst, conn->type, conn->dst_type, status); + /* If the disconnection failed for any reason, the upper layer + * does not retry to disconnect in current implementation. + * Hence, we need to do some basic cleanup here and re-enable + * advertising if necessary. + */ + hci_conn_del(conn); + if (type == LE_LINK) + hci_req_reenable_advertising(hdev); + } + hci_dev_unlock(hdev); } @@ -2474,6 +2517,7 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_conn_complete *ev = (void *) skb->data; + struct inquiry_entry *ie; struct hci_conn *conn; BT_DBG("%s", hdev->name); @@ -2482,6 +2526,21 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); if (!conn) { + /* Connection may not exist if auto-connected. Check the inquiry + * cache to see if we've already discovered this bdaddr before. + * If found and link is an ACL type, create a connection class + * automatically. + */ + ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr); + if (ie && ev->link_type == ACL_LINK) { + conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr, + HCI_ROLE_SLAVE); + if (!conn) { + bt_dev_err(hdev, "no memory for new conn"); + goto unlock; + } + } + if (ev->link_type != SCO_LINK) goto unlock; @@ -2743,6 +2802,14 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_disconn_cfm(conn, ev->reason); hci_conn_del(conn); + /* The suspend notifier is waiting for all devices to disconnect so + * clear the bit from pending tasks and inform the wait queue. + */ + if (list_empty(&hdev->conn_hash.list) && + test_and_clear_bit(SUSPEND_DISCONNECTING, hdev->suspend_tasks)) { + wake_up(&hdev->suspend_wait_q); + } + /* Re-enable advertising if necessary, since it might * have been disabled by the connection. From the * HCI_LE_Set_Advertise_Enable command description in @@ -3302,6 +3369,14 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb, hci_cc_read_inq_rsp_tx_power(hdev, skb); break; + case HCI_OP_READ_DEF_ERR_DATA_REPORTING: + hci_cc_read_def_err_data_reporting(hdev, skb); + break; + + case HCI_OP_WRITE_DEF_ERR_DATA_REPORTING: + hci_cc_write_def_err_data_reporting(hdev, skb); + break; + case HCI_OP_PIN_CODE_REPLY: hci_cc_pin_code_reply(hdev, skb); break; @@ -4557,6 +4632,16 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev, goto confirm; } + /* If there already exists link key in local host, leave the + * decision to user space since the remote device could be + * legitimate or malicious. + */ + if (hci_find_link_key(hdev, &ev->bdaddr)) { + bt_dev_dbg(hdev, "Local host already has link key"); + confirm_hint = 1; + goto confirm; + } + BT_DBG("Auto-accept of user confirmation with %ums delay", hdev->auto_accept_delay); @@ -5858,6 +5943,11 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) u8 status = 0, event = hdr->evt, req_evt = 0; u16 opcode = HCI_OP_NOP; + if (!event) { + bt_dev_warn(hdev, "Received unexpected HCI Event 00000000"); + goto done; + } + if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->hci.req_event == event) { struct hci_command_hdr *cmd_hdr = (void *) hdev->sent_cmd->data; opcode = __le16_to_cpu(cmd_hdr->opcode); @@ -6069,6 +6159,7 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) req_complete_skb(hdev, status, opcode, orig_skb); } +done: kfree_skb(orig_skb); kfree_skb(skb); hdev->stat.evt_rx++; diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 2a1b64dbf76e..649e1e5ed446 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -34,6 +34,9 @@ #define HCI_REQ_PEND 1 #define HCI_REQ_CANCELED 2 +#define LE_SUSPEND_SCAN_WINDOW 0x0012 +#define LE_SUSPEND_SCAN_INTERVAL 0x0060 + void hci_req_init(struct hci_request *req, struct hci_dev *hdev) { skb_queue_head_init(&req->cmd_q); @@ -654,6 +657,11 @@ void hci_req_add_le_scan_disable(struct hci_request *req) { struct hci_dev *hdev = req->hdev; + if (hdev->scanning_paused) { + bt_dev_dbg(hdev, "Scanning is paused for suspend"); + return; + } + if (use_ext_scan(hdev)) { struct hci_cp_le_set_ext_scan_enable cp; @@ -670,15 +678,55 @@ void hci_req_add_le_scan_disable(struct hci_request *req) } } -static void add_to_white_list(struct hci_request *req, - struct hci_conn_params *params) +static void del_from_white_list(struct hci_request *req, bdaddr_t *bdaddr, + u8 bdaddr_type) +{ + struct hci_cp_le_del_from_white_list cp; + + cp.bdaddr_type = bdaddr_type; + bacpy(&cp.bdaddr, bdaddr); + + bt_dev_dbg(req->hdev, "Remove %pMR (0x%x) from whitelist", &cp.bdaddr, + cp.bdaddr_type); + hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, sizeof(cp), &cp); +} + +/* Adds connection to white list if needed. On error, returns -1. */ +static int add_to_white_list(struct hci_request *req, + struct hci_conn_params *params, u8 *num_entries, + bool allow_rpa) { struct hci_cp_le_add_to_white_list cp; + struct hci_dev *hdev = req->hdev; + + /* Already in white list */ + if (hci_bdaddr_list_lookup(&hdev->le_white_list, ¶ms->addr, + params->addr_type)) + return 0; + + /* Select filter policy to accept all advertising */ + if (*num_entries >= hdev->le_white_list_size) + return -1; + + /* White list can not be used with RPAs */ + if (!allow_rpa && + hci_find_irk_by_addr(hdev, ¶ms->addr, params->addr_type)) { + return -1; + } + /* During suspend, only wakeable devices can be in whitelist */ + if (hdev->suspended && !params->wakeable) + return 0; + + *num_entries += 1; cp.bdaddr_type = params->addr_type; bacpy(&cp.bdaddr, ¶ms->addr); + bt_dev_dbg(hdev, "Add %pMR (0x%x) to whitelist", &cp.bdaddr, + cp.bdaddr_type); hci_req_add(req, HCI_OP_LE_ADD_TO_WHITE_LIST, sizeof(cp), &cp); + + return 0; } static u8 update_white_list(struct hci_request *req) @@ -686,7 +734,14 @@ static u8 update_white_list(struct hci_request *req) struct hci_dev *hdev = req->hdev; struct hci_conn_params *params; struct bdaddr_list *b; - uint8_t white_list_entries = 0; + u8 num_entries = 0; + bool pend_conn, pend_report; + /* We allow whitelisting even with RPAs in suspend. In the worst case, + * we won't be able to wake from devices that use the privacy1.2 + * features. Additionally, once we support privacy1.2 and IRK + * offloading, we can update this to also check for those conditions. + */ + bool allow_rpa = hdev->suspended; /* Go through the current white list programmed into the * controller one by one and check if that address is still @@ -695,29 +750,28 @@ static u8 update_white_list(struct hci_request *req) * command to remove it from the controller. */ list_for_each_entry(b, &hdev->le_white_list, list) { - /* If the device is neither in pend_le_conns nor - * pend_le_reports then remove it from the whitelist. + pend_conn = hci_pend_le_action_lookup(&hdev->pend_le_conns, + &b->bdaddr, + b->bdaddr_type); + pend_report = hci_pend_le_action_lookup(&hdev->pend_le_reports, + &b->bdaddr, + b->bdaddr_type); + + /* If the device is not likely to connect or report, + * remove it from the whitelist. */ - if (!hci_pend_le_action_lookup(&hdev->pend_le_conns, - &b->bdaddr, b->bdaddr_type) && - !hci_pend_le_action_lookup(&hdev->pend_le_reports, - &b->bdaddr, b->bdaddr_type)) { - struct hci_cp_le_del_from_white_list cp; - - cp.bdaddr_type = b->bdaddr_type; - bacpy(&cp.bdaddr, &b->bdaddr); - - hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, - sizeof(cp), &cp); + if (!pend_conn && !pend_report) { + del_from_white_list(req, &b->bdaddr, b->bdaddr_type); continue; } - if (hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) { - /* White list can not be used with RPAs */ + /* White list can not be used with RPAs */ + if (!allow_rpa && + hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) { return 0x00; } - white_list_entries++; + num_entries++; } /* Since all no longer valid white list entries have been @@ -731,47 +785,17 @@ static u8 update_white_list(struct hci_request *req) * white list. */ list_for_each_entry(params, &hdev->pend_le_conns, action) { - if (hci_bdaddr_list_lookup(&hdev->le_white_list, - ¶ms->addr, params->addr_type)) - continue; - - if (white_list_entries >= hdev->le_white_list_size) { - /* Select filter policy to accept all advertising */ + if (add_to_white_list(req, params, &num_entries, allow_rpa)) return 0x00; - } - - if (hci_find_irk_by_addr(hdev, ¶ms->addr, - params->addr_type)) { - /* White list can not be used with RPAs */ - return 0x00; - } - - white_list_entries++; - add_to_white_list(req, params); } /* After adding all new pending connections, walk through * the list of pending reports and also add these to the - * white list if there is still space. + * white list if there is still space. Abort if space runs out. */ list_for_each_entry(params, &hdev->pend_le_reports, action) { - if (hci_bdaddr_list_lookup(&hdev->le_white_list, - ¶ms->addr, params->addr_type)) - continue; - - if (white_list_entries >= hdev->le_white_list_size) { - /* Select filter policy to accept all advertising */ - return 0x00; - } - - if (hci_find_irk_by_addr(hdev, ¶ms->addr, - params->addr_type)) { - /* White list can not be used with RPAs */ + if (add_to_white_list(req, params, &num_entries, allow_rpa)) return 0x00; - } - - white_list_entries++; - add_to_white_list(req, params); } /* Select filter policy to use white list */ @@ -866,6 +890,12 @@ void hci_req_add_le_passive_scan(struct hci_request *req) struct hci_dev *hdev = req->hdev; u8 own_addr_type; u8 filter_policy; + u8 window, interval; + + if (hdev->scanning_paused) { + bt_dev_dbg(hdev, "Scanning is paused for suspend"); + return; + } /* Set require_privacy to false since no SCAN_REQ are send * during passive scanning. Not using an non-resolvable address @@ -896,8 +926,17 @@ void hci_req_add_le_passive_scan(struct hci_request *req) (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY)) filter_policy |= 0x02; - hci_req_start_scan(req, LE_SCAN_PASSIVE, hdev->le_scan_interval, - hdev->le_scan_window, own_addr_type, filter_policy); + if (hdev->suspended) { + window = LE_SUSPEND_SCAN_WINDOW; + interval = LE_SUSPEND_SCAN_INTERVAL; + } else { + window = hdev->le_scan_window; + interval = hdev->le_scan_interval; + } + + bt_dev_dbg(hdev, "LE passive scan with whitelist = %d", filter_policy); + hci_req_start_scan(req, LE_SCAN_PASSIVE, interval, window, + own_addr_type, filter_policy); } static u8 get_adv_instance_scan_rsp_len(struct hci_dev *hdev, u8 instance) @@ -918,6 +957,187 @@ static u8 get_adv_instance_scan_rsp_len(struct hci_dev *hdev, u8 instance) return adv_instance->scan_rsp_len; } +static void hci_req_clear_event_filter(struct hci_request *req) +{ + struct hci_cp_set_event_filter f; + + memset(&f, 0, sizeof(f)); + f.flt_type = HCI_FLT_CLEAR_ALL; + hci_req_add(req, HCI_OP_SET_EVENT_FLT, 1, &f); + + /* Update page scan state (since we may have modified it when setting + * the event filter). + */ + __hci_req_update_scan(req); +} + +static void hci_req_set_event_filter(struct hci_request *req) +{ + struct bdaddr_list *b; + struct hci_cp_set_event_filter f; + struct hci_dev *hdev = req->hdev; + u8 scan; + + /* Always clear event filter when starting */ + hci_req_clear_event_filter(req); + + list_for_each_entry(b, &hdev->wakeable, list) { + memset(&f, 0, sizeof(f)); + bacpy(&f.addr_conn_flt.bdaddr, &b->bdaddr); + f.flt_type = HCI_FLT_CONN_SETUP; + f.cond_type = HCI_CONN_SETUP_ALLOW_BDADDR; + f.addr_conn_flt.auto_accept = HCI_CONN_SETUP_AUTO_ON; + + bt_dev_dbg(hdev, "Adding event filters for %pMR", &b->bdaddr); + hci_req_add(req, HCI_OP_SET_EVENT_FLT, sizeof(f), &f); + } + + scan = !list_empty(&hdev->wakeable) ? SCAN_PAGE : SCAN_DISABLED; + hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); +} + +static void hci_req_config_le_suspend_scan(struct hci_request *req) +{ + /* Can't change params without disabling first */ + hci_req_add_le_scan_disable(req); + + /* Configure params and enable scanning */ + hci_req_add_le_passive_scan(req); + + /* Block suspend notifier on response */ + set_bit(SUSPEND_SCAN_ENABLE, req->hdev->suspend_tasks); +} + +static void suspend_req_complete(struct hci_dev *hdev, u8 status, u16 opcode) +{ + bt_dev_dbg(hdev, "Request complete opcode=0x%x, status=0x%x", opcode, + status); + if (test_and_clear_bit(SUSPEND_SCAN_ENABLE, hdev->suspend_tasks) || + test_and_clear_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks)) { + wake_up(&hdev->suspend_wait_q); + } +} + +/* Call with hci_dev_lock */ +void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) +{ + int old_state; + struct hci_conn *conn; + struct hci_request req; + u8 page_scan; + int disconnect_counter; + + if (next == hdev->suspend_state) { + bt_dev_dbg(hdev, "Same state before and after: %d", next); + goto done; + } + + hdev->suspend_state = next; + hci_req_init(&req, hdev); + + if (next == BT_SUSPEND_DISCONNECT) { + /* Mark device as suspended */ + hdev->suspended = true; + + /* Pause discovery if not already stopped */ + old_state = hdev->discovery.state; + if (old_state != DISCOVERY_STOPPED) { + set_bit(SUSPEND_PAUSE_DISCOVERY, hdev->suspend_tasks); + hci_discovery_set_state(hdev, DISCOVERY_STOPPING); + queue_work(hdev->req_workqueue, &hdev->discov_update); + } + + hdev->discovery_paused = true; + hdev->discovery_old_state = old_state; + + /* Stop advertising */ + old_state = hci_dev_test_flag(hdev, HCI_ADVERTISING); + if (old_state) { + set_bit(SUSPEND_PAUSE_ADVERTISING, hdev->suspend_tasks); + cancel_delayed_work(&hdev->discov_off); + queue_delayed_work(hdev->req_workqueue, + &hdev->discov_off, 0); + } + + hdev->advertising_paused = true; + hdev->advertising_old_state = old_state; + /* Disable page scan */ + page_scan = SCAN_DISABLED; + hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &page_scan); + + /* Disable LE passive scan */ + hci_req_add_le_scan_disable(&req); + + /* Mark task needing completion */ + set_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks); + + /* Prevent disconnects from causing scanning to be re-enabled */ + hdev->scanning_paused = true; + + /* Run commands before disconnecting */ + hci_req_run(&req, suspend_req_complete); + + disconnect_counter = 0; + /* Soft disconnect everything (power off) */ + list_for_each_entry(conn, &hdev->conn_hash.list, list) { + hci_disconnect(conn, HCI_ERROR_REMOTE_POWER_OFF); + disconnect_counter++; + } + + if (disconnect_counter > 0) { + bt_dev_dbg(hdev, + "Had %d disconnects. Will wait on them", + disconnect_counter); + set_bit(SUSPEND_DISCONNECTING, hdev->suspend_tasks); + } + } else if (next == BT_SUSPEND_COMPLETE) { + /* Unpause to take care of updating scanning params */ + hdev->scanning_paused = false; + /* Enable event filter for paired devices */ + hci_req_set_event_filter(&req); + /* Enable passive scan at lower duty cycle */ + hci_req_config_le_suspend_scan(&req); + /* Pause scan changes again. */ + hdev->scanning_paused = true; + hci_req_run(&req, suspend_req_complete); + } else { + hdev->suspended = false; + hdev->scanning_paused = false; + + hci_req_clear_event_filter(&req); + /* Reset passive/background scanning to normal */ + hci_req_config_le_suspend_scan(&req); + + /* Unpause advertising */ + hdev->advertising_paused = false; + if (hdev->advertising_old_state) { + set_bit(SUSPEND_UNPAUSE_ADVERTISING, + hdev->suspend_tasks); + hci_dev_set_flag(hdev, HCI_ADVERTISING); + queue_work(hdev->req_workqueue, + &hdev->discoverable_update); + hdev->advertising_old_state = 0; + } + + /* Unpause discovery */ + hdev->discovery_paused = false; + if (hdev->discovery_old_state != DISCOVERY_STOPPED && + hdev->discovery_old_state != DISCOVERY_STOPPING) { + set_bit(SUSPEND_UNPAUSE_DISCOVERY, hdev->suspend_tasks); + hci_discovery_set_state(hdev, DISCOVERY_STARTING); + queue_work(hdev->req_workqueue, &hdev->discov_update); + } + + hci_req_run(&req, suspend_req_complete); + } + + hdev->suspend_state = next; + +done: + clear_bit(SUSPEND_PREPARE_NOTIFIER, hdev->suspend_tasks); + wake_up(&hdev->suspend_wait_q); +} + static u8 get_cur_adv_instance_scan_rsp_len(struct hci_dev *hdev) { u8 instance = hdev->cur_adv_instance; @@ -1499,7 +1719,7 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); if (err < 0) { - BT_ERR("%s failed to generate new RPA", hdev->name); + bt_dev_err(hdev, "failed to generate new RPA"); return err; } @@ -2015,6 +2235,9 @@ void __hci_req_update_scan(struct hci_request *req) if (mgmt_powering_down(hdev)) return; + if (hdev->scanning_paused) + return; + if (hci_dev_test_flag(hdev, HCI_CONNECTABLE) || disconnected_whitelist_entries(hdev)) scan = SCAN_PAGE; @@ -2504,23 +2727,6 @@ static int active_scan(struct hci_request *req, unsigned long opt) BT_DBG("%s", hdev->name); - if (hci_dev_test_flag(hdev, HCI_LE_ADV)) { - hci_dev_lock(hdev); - - /* Don't let discovery abort an outgoing connection attempt - * that's using directed advertising. - */ - if (hci_lookup_le_connect(hdev)) { - hci_dev_unlock(hdev); - return -EBUSY; - } - - cancel_adv_timeout(hdev); - hci_dev_unlock(hdev); - - __hci_req_disable_advertising(req); - } - /* If controller is scanning, it means the background scanning is * running. Thus, we should temporarily stop it in order to set the * discovery scanning parameters. diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index a7019fbeadd3..0e81614d235e 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -68,6 +68,8 @@ void __hci_req_update_eir(struct hci_request *req); void hci_req_add_le_scan_disable(struct hci_request *req); void hci_req_add_le_passive_scan(struct hci_request *req); +void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next); + void hci_req_reenable_advertising(struct hci_dev *hdev); void __hci_req_enable_advertising(struct hci_request *req); void __hci_req_disable_advertising(struct hci_request *req); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index bef84b95e2c4..3b4fa27a44e6 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -1279,7 +1279,7 @@ static int hidp_session_thread(void *arg) add_wait_queue(sk_sleep(session->intr_sock->sk), &intr_wait); /* This memory barrier is paired with wq_has_sleeper(). See * sock_poll_wait() for more information why this is needed. */ - smp_mb(); + smp_mb__before_atomic(); /* notify synchronous startup that we're ready */ atomic_inc(&session->state); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 195459a1e53e..8b0fca39989d 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -45,6 +45,7 @@ #define LE_FLOWCTL_MAX_CREDITS 65535 bool disable_ertm; +bool enable_ecred; static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN | L2CAP_FEAT_UCD; @@ -419,6 +420,9 @@ static void l2cap_chan_timeout(struct work_struct *work) BT_DBG("chan %p state %s", chan, state_to_string(chan->state)); mutex_lock(&conn->chan_lock); + /* __set_chan_timer() calls l2cap_chan_hold(chan) while scheduling + * this work. No need to call l2cap_chan_hold(chan) here again. + */ l2cap_chan_lock(chan); if (chan->state == BT_CONNECTED || chan->state == BT_CONFIG) @@ -431,12 +435,12 @@ static void l2cap_chan_timeout(struct work_struct *work) l2cap_chan_close(chan, reason); - l2cap_chan_unlock(chan); - chan->ops->close(chan); - mutex_unlock(&conn->chan_lock); + l2cap_chan_unlock(chan); l2cap_chan_put(chan); + + mutex_unlock(&conn->chan_lock); } struct l2cap_chan *l2cap_chan_create(void) @@ -532,6 +536,17 @@ static void l2cap_le_flowctl_init(struct l2cap_chan *chan, u16 tx_credits) skb_queue_head_init(&chan->tx_q); } +static void l2cap_ecred_init(struct l2cap_chan *chan, u16 tx_credits) +{ + l2cap_le_flowctl_init(chan, tx_credits); + + /* L2CAP implementations shall support a minimum MPS of 64 octets */ + if (chan->mps < L2CAP_ECRED_MIN_MPS) { + chan->mps = L2CAP_ECRED_MIN_MPS; + chan->rx_credits = (chan->imtu / chan->mps) + 1; + } +} + void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) { BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, @@ -638,6 +653,7 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err) break; case L2CAP_MODE_LE_FLOWCTL: + case L2CAP_MODE_EXT_FLOWCTL: skb_queue_purge(&chan->tx_q); break; @@ -704,6 +720,27 @@ static void l2cap_chan_le_connect_reject(struct l2cap_chan *chan) &rsp); } +static void l2cap_chan_ecred_connect_reject(struct l2cap_chan *chan) +{ + struct l2cap_conn *conn = chan->conn; + struct l2cap_ecred_conn_rsp rsp; + u16 result; + + if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) + result = L2CAP_CR_LE_AUTHORIZATION; + else + result = L2CAP_CR_LE_BAD_PSM; + + l2cap_state_change(chan, BT_DISCONN); + + memset(&rsp, 0, sizeof(rsp)); + + rsp.result = cpu_to_le16(result); + + l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CONN_RSP, sizeof(rsp), + &rsp); +} + static void l2cap_chan_connect_reject(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; @@ -749,8 +786,16 @@ void l2cap_chan_close(struct l2cap_chan *chan, int reason) if (chan->chan_type == L2CAP_CHAN_CONN_ORIENTED) { if (conn->hcon->type == ACL_LINK) l2cap_chan_connect_reject(chan); - else if (conn->hcon->type == LE_LINK) - l2cap_chan_le_connect_reject(chan); + else if (conn->hcon->type == LE_LINK) { + switch (chan->mode) { + case L2CAP_MODE_LE_FLOWCTL: + l2cap_chan_le_connect_reject(chan); + break; + case L2CAP_MODE_EXT_FLOWCTL: + l2cap_chan_ecred_connect_reject(chan); + break; + } + } } l2cap_chan_del(chan, reason); @@ -1273,8 +1318,13 @@ static void l2cap_chan_ready(struct l2cap_chan *chan) chan->conf_state = 0; __clear_chan_timer(chan); - if (chan->mode == L2CAP_MODE_LE_FLOWCTL && !chan->tx_credits) - chan->ops->suspend(chan); + switch (chan->mode) { + case L2CAP_MODE_LE_FLOWCTL: + case L2CAP_MODE_EXT_FLOWCTL: + if (!chan->tx_credits) + chan->ops->suspend(chan); + break; + } chan->state = BT_CONNECTED; @@ -1306,6 +1356,31 @@ static void l2cap_le_connect(struct l2cap_chan *chan) sizeof(req), &req); } +static void l2cap_ecred_connect(struct l2cap_chan *chan) +{ + struct l2cap_conn *conn = chan->conn; + struct { + struct l2cap_ecred_conn_req req; + __le16 scid; + } __packed pdu; + + if (test_and_set_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags)) + return; + + l2cap_ecred_init(chan, 0); + + pdu.req.psm = chan->psm; + pdu.req.mtu = cpu_to_le16(chan->imtu); + pdu.req.mps = cpu_to_le16(chan->mps); + pdu.req.credits = cpu_to_le16(chan->rx_credits); + pdu.scid = cpu_to_le16(chan->scid); + + chan->ident = l2cap_get_ident(conn); + + l2cap_send_cmd(conn, chan->ident, L2CAP_ECRED_CONN_REQ, + sizeof(pdu), &pdu); +} + static void l2cap_le_start(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; @@ -1318,8 +1393,12 @@ static void l2cap_le_start(struct l2cap_chan *chan) return; } - if (chan->state == BT_CONNECT) - l2cap_le_connect(chan); + if (chan->state == BT_CONNECT) { + if (chan->mode == L2CAP_MODE_EXT_FLOWCTL) + l2cap_ecred_connect(chan); + else + l2cap_le_connect(chan); + } } static void l2cap_start_connection(struct l2cap_chan *chan) @@ -1737,9 +1816,9 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) l2cap_chan_del(chan, err); - l2cap_chan_unlock(chan); - chan->ops->close(chan); + + l2cap_chan_unlock(chan); l2cap_chan_put(chan); } @@ -2505,6 +2584,7 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len) switch (chan->mode) { case L2CAP_MODE_LE_FLOWCTL: + case L2CAP_MODE_EXT_FLOWCTL: /* Check outgoing MTU */ if (len > chan->omtu) return -EMSGSIZE; @@ -3773,6 +3853,45 @@ void __l2cap_le_connect_rsp_defer(struct l2cap_chan *chan) &rsp); } +void __l2cap_ecred_conn_rsp_defer(struct l2cap_chan *chan) +{ + struct { + struct l2cap_ecred_conn_rsp rsp; + __le16 dcid[5]; + } __packed pdu; + struct l2cap_conn *conn = chan->conn; + u16 ident = chan->ident; + int i = 0; + + if (!ident) + return; + + BT_DBG("chan %p ident %d", chan, ident); + + pdu.rsp.mtu = cpu_to_le16(chan->imtu); + pdu.rsp.mps = cpu_to_le16(chan->mps); + pdu.rsp.credits = cpu_to_le16(chan->rx_credits); + pdu.rsp.result = cpu_to_le16(L2CAP_CR_LE_SUCCESS); + + mutex_lock(&conn->chan_lock); + + list_for_each_entry(chan, &conn->chan_l, list) { + if (chan->ident != ident) + continue; + + /* Reset ident so only one response is sent */ + chan->ident = 0; + + /* Include all channels pending with the same ident */ + pdu.dcid[i++] = cpu_to_le16(chan->scid); + } + + mutex_unlock(&conn->chan_lock); + + l2cap_send_cmd(conn, ident, L2CAP_ECRED_CONN_RSP, + sizeof(pdu.rsp) + i * sizeof(__le16), &pdu); +} + void __l2cap_connect_rsp_defer(struct l2cap_chan *chan) { struct l2cap_conn_rsp rsp; @@ -4181,7 +4300,8 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, return 0; } - if (chan->state != BT_CONFIG && chan->state != BT_CONNECT2) { + if (chan->state != BT_CONFIG && chan->state != BT_CONNECT2 && + chan->state != BT_CONNECTED) { cmd_reject_invalid_cid(conn, cmd->ident, chan->scid, chan->dcid); goto unlock; @@ -4405,6 +4525,7 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, return 0; } + l2cap_chan_hold(chan); l2cap_chan_lock(chan); rsp.dcid = cpu_to_le16(chan->scid); @@ -4413,12 +4534,11 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, chan->ops->set_shutdown(chan); - l2cap_chan_hold(chan); l2cap_chan_del(chan, ECONNRESET); - l2cap_chan_unlock(chan); - chan->ops->close(chan); + + l2cap_chan_unlock(chan); l2cap_chan_put(chan); mutex_unlock(&conn->chan_lock); @@ -4450,20 +4570,21 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, return 0; } + l2cap_chan_hold(chan); l2cap_chan_lock(chan); if (chan->state != BT_DISCONN) { l2cap_chan_unlock(chan); + l2cap_chan_put(chan); mutex_unlock(&conn->chan_lock); return 0; } - l2cap_chan_hold(chan); l2cap_chan_del(chan, 0); - l2cap_chan_unlock(chan); - chan->ops->close(chan); + + l2cap_chan_unlock(chan); l2cap_chan_put(chan); mutex_unlock(&conn->chan_lock); @@ -5714,6 +5835,356 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn, return 0; } +static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, + u8 *data) +{ + struct l2cap_ecred_conn_req *req = (void *) data; + struct { + struct l2cap_ecred_conn_rsp rsp; + __le16 dcid[5]; + } __packed pdu; + struct l2cap_chan *chan, *pchan; + u16 mtu, mps; + __le16 psm; + u8 result, len = 0; + int i, num_scid; + bool defer = false; + + if (!enable_ecred) + return -EINVAL; + + if (cmd_len < sizeof(*req) || cmd_len - sizeof(*req) % sizeof(u16)) { + result = L2CAP_CR_LE_INVALID_PARAMS; + goto response; + } + + mtu = __le16_to_cpu(req->mtu); + mps = __le16_to_cpu(req->mps); + + if (mtu < L2CAP_ECRED_MIN_MTU || mps < L2CAP_ECRED_MIN_MPS) { + result = L2CAP_CR_LE_UNACCEPT_PARAMS; + goto response; + } + + psm = req->psm; + + BT_DBG("psm 0x%2.2x mtu %u mps %u", __le16_to_cpu(psm), mtu, mps); + + memset(&pdu, 0, sizeof(pdu)); + + /* Check if we have socket listening on psm */ + pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src, + &conn->hcon->dst, LE_LINK); + if (!pchan) { + result = L2CAP_CR_LE_BAD_PSM; + goto response; + } + + mutex_lock(&conn->chan_lock); + l2cap_chan_lock(pchan); + + if (!smp_sufficient_security(conn->hcon, pchan->sec_level, + SMP_ALLOW_STK)) { + result = L2CAP_CR_LE_AUTHENTICATION; + goto unlock; + } + + result = L2CAP_CR_LE_SUCCESS; + cmd_len -= sizeof(req); + num_scid = cmd_len / sizeof(u16); + + for (i = 0; i < num_scid; i++) { + u16 scid = __le16_to_cpu(req->scid[i]); + + BT_DBG("scid[%d] 0x%4.4x", i, scid); + + pdu.dcid[i] = 0x0000; + len += sizeof(*pdu.dcid); + + /* Check for valid dynamic CID range */ + if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_LE_DYN_END) { + result = L2CAP_CR_LE_INVALID_SCID; + continue; + } + + /* Check if we already have channel with that dcid */ + if (__l2cap_get_chan_by_dcid(conn, scid)) { + result = L2CAP_CR_LE_SCID_IN_USE; + continue; + } + + chan = pchan->ops->new_connection(pchan); + if (!chan) { + result = L2CAP_CR_LE_NO_MEM; + continue; + } + + bacpy(&chan->src, &conn->hcon->src); + bacpy(&chan->dst, &conn->hcon->dst); + chan->src_type = bdaddr_src_type(conn->hcon); + chan->dst_type = bdaddr_dst_type(conn->hcon); + chan->psm = psm; + chan->dcid = scid; + chan->omtu = mtu; + chan->remote_mps = mps; + + __l2cap_chan_add(conn, chan); + + l2cap_ecred_init(chan, __le16_to_cpu(req->credits)); + + /* Init response */ + if (!pdu.rsp.credits) { + pdu.rsp.mtu = cpu_to_le16(chan->imtu); + pdu.rsp.mps = cpu_to_le16(chan->mps); + pdu.rsp.credits = cpu_to_le16(chan->rx_credits); + } + + pdu.dcid[i] = cpu_to_le16(chan->scid); + + __set_chan_timer(chan, chan->ops->get_sndtimeo(chan)); + + chan->ident = cmd->ident; + + if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) { + l2cap_state_change(chan, BT_CONNECT2); + defer = true; + chan->ops->defer(chan); + } else { + l2cap_chan_ready(chan); + } + } + +unlock: + l2cap_chan_unlock(pchan); + mutex_unlock(&conn->chan_lock); + l2cap_chan_put(pchan); + +response: + pdu.rsp.result = cpu_to_le16(result); + + if (defer) + return 0; + + l2cap_send_cmd(conn, cmd->ident, L2CAP_ECRED_CONN_RSP, + sizeof(pdu.rsp) + len, &pdu); + + return 0; +} + +static inline int l2cap_ecred_conn_rsp(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, + u8 *data) +{ + struct l2cap_ecred_conn_rsp *rsp = (void *) data; + struct hci_conn *hcon = conn->hcon; + u16 mtu, mps, credits, result; + struct l2cap_chan *chan; + int err = 0, sec_level; + int i = 0; + + if (cmd_len < sizeof(*rsp)) + return -EPROTO; + + mtu = __le16_to_cpu(rsp->mtu); + mps = __le16_to_cpu(rsp->mps); + credits = __le16_to_cpu(rsp->credits); + result = __le16_to_cpu(rsp->result); + + BT_DBG("mtu %u mps %u credits %u result 0x%4.4x", mtu, mps, credits, + result); + + mutex_lock(&conn->chan_lock); + + cmd_len -= sizeof(*rsp); + + list_for_each_entry(chan, &conn->chan_l, list) { + u16 dcid; + + if (chan->ident != cmd->ident || + chan->mode != L2CAP_MODE_EXT_FLOWCTL || + chan->state == BT_CONNECTED) + continue; + + l2cap_chan_lock(chan); + + /* Check that there is a dcid for each pending channel */ + if (cmd_len < sizeof(dcid)) { + l2cap_chan_del(chan, ECONNREFUSED); + l2cap_chan_unlock(chan); + continue; + } + + dcid = __le16_to_cpu(rsp->dcid[i++]); + cmd_len -= sizeof(u16); + + BT_DBG("dcid[%d] 0x%4.4x", i, dcid); + + /* Check if dcid is already in use */ + if (dcid && __l2cap_get_chan_by_dcid(conn, dcid)) { + /* If a device receives a + * L2CAP_CREDIT_BASED_CONNECTION_RSP packet with an + * already-assigned Destination CID, then both the + * original channel and the new channel shall be + * immediately discarded and not used. + */ + l2cap_chan_del(chan, ECONNREFUSED); + l2cap_chan_unlock(chan); + chan = __l2cap_get_chan_by_dcid(conn, dcid); + l2cap_chan_lock(chan); + l2cap_chan_del(chan, ECONNRESET); + l2cap_chan_unlock(chan); + continue; + } + + switch (result) { + case L2CAP_CR_LE_AUTHENTICATION: + case L2CAP_CR_LE_ENCRYPTION: + /* If we already have MITM protection we can't do + * anything. + */ + if (hcon->sec_level > BT_SECURITY_MEDIUM) { + l2cap_chan_del(chan, ECONNREFUSED); + break; + } + + sec_level = hcon->sec_level + 1; + if (chan->sec_level < sec_level) + chan->sec_level = sec_level; + + /* We'll need to send a new Connect Request */ + clear_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags); + + smp_conn_security(hcon, chan->sec_level); + break; + + case L2CAP_CR_LE_BAD_PSM: + l2cap_chan_del(chan, ECONNREFUSED); + break; + + default: + /* If dcid was not set it means channels was refused */ + if (!dcid) { + l2cap_chan_del(chan, ECONNREFUSED); + break; + } + + chan->ident = 0; + chan->dcid = dcid; + chan->omtu = mtu; + chan->remote_mps = mps; + chan->tx_credits = credits; + l2cap_chan_ready(chan); + break; + } + + l2cap_chan_unlock(chan); + } + + mutex_unlock(&conn->chan_lock); + + return err; +} + +static inline int l2cap_ecred_reconf_req(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, + u8 *data) +{ + struct l2cap_ecred_reconf_req *req = (void *) data; + struct l2cap_ecred_reconf_rsp rsp; + u16 mtu, mps, result; + struct l2cap_chan *chan; + int i, num_scid; + + if (!enable_ecred) + return -EINVAL; + + if (cmd_len < sizeof(*req) || cmd_len - sizeof(*req) % sizeof(u16)) { + result = L2CAP_CR_LE_INVALID_PARAMS; + goto respond; + } + + mtu = __le16_to_cpu(req->mtu); + mps = __le16_to_cpu(req->mps); + + BT_DBG("mtu %u mps %u", mtu, mps); + + if (mtu < L2CAP_ECRED_MIN_MTU) { + result = L2CAP_RECONF_INVALID_MTU; + goto respond; + } + + if (mps < L2CAP_ECRED_MIN_MPS) { + result = L2CAP_RECONF_INVALID_MPS; + goto respond; + } + + cmd_len -= sizeof(*req); + num_scid = cmd_len / sizeof(u16); + result = L2CAP_RECONF_SUCCESS; + + for (i = 0; i < num_scid; i++) { + u16 scid; + + scid = __le16_to_cpu(req->scid[i]); + if (!scid) + return -EPROTO; + + chan = __l2cap_get_chan_by_dcid(conn, scid); + if (!chan) + continue; + + /* If the MTU value is decreased for any of the included + * channels, then the receiver shall disconnect all + * included channels. + */ + if (chan->omtu > mtu) { + BT_ERR("chan %p decreased MTU %u -> %u", chan, + chan->omtu, mtu); + result = L2CAP_RECONF_INVALID_MTU; + } + + chan->omtu = mtu; + chan->remote_mps = mps; + } + +respond: + rsp.result = cpu_to_le16(result); + + l2cap_send_cmd(conn, cmd->ident, L2CAP_ECRED_RECONF_RSP, sizeof(rsp), + &rsp); + + return 0; +} + +static inline int l2cap_ecred_reconf_rsp(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, + u8 *data) +{ + struct l2cap_chan *chan; + struct l2cap_ecred_conn_rsp *rsp = (void *) data; + u16 result; + + if (cmd_len < sizeof(*rsp)) + return -EPROTO; + + result = __le16_to_cpu(rsp->result); + + BT_DBG("result 0x%4.4x", rsp->result); + + if (!result) + return 0; + + list_for_each_entry(chan, &conn->chan_l, list) { + if (chan->ident != cmd->ident) + continue; + + l2cap_chan_del(chan, ECONNRESET); + } + + return 0; +} + static inline int l2cap_le_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) @@ -5769,6 +6240,22 @@ static inline int l2cap_le_sig_cmd(struct l2cap_conn *conn, err = l2cap_le_credits(conn, cmd, cmd_len, data); break; + case L2CAP_ECRED_CONN_REQ: + err = l2cap_ecred_conn_req(conn, cmd, cmd_len, data); + break; + + case L2CAP_ECRED_CONN_RSP: + err = l2cap_ecred_conn_rsp(conn, cmd, cmd_len, data); + break; + + case L2CAP_ECRED_RECONF_REQ: + err = l2cap_ecred_reconf_req(conn, cmd, cmd_len, data); + break; + + case L2CAP_ECRED_RECONF_RSP: + err = l2cap_ecred_reconf_rsp(conn, cmd, cmd_len, data); + break; + case L2CAP_DISCONN_REQ: err = l2cap_disconnect_req(conn, cmd, cmd_len, data); break; @@ -5831,9 +6318,7 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) { struct hci_conn *hcon = conn->hcon; - u8 *data = skb->data; - int len = skb->len; - struct l2cap_cmd_hdr cmd; + struct l2cap_cmd_hdr *cmd; int err; l2cap_raw_recv(conn, skb); @@ -5841,35 +6326,34 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, if (hcon->type != ACL_LINK) goto drop; - while (len >= L2CAP_CMD_HDR_SIZE) { - u16 cmd_len; - memcpy(&cmd, data, L2CAP_CMD_HDR_SIZE); - data += L2CAP_CMD_HDR_SIZE; - len -= L2CAP_CMD_HDR_SIZE; + while (skb->len >= L2CAP_CMD_HDR_SIZE) { + u16 len; + + cmd = (void *) skb->data; + skb_pull(skb, L2CAP_CMD_HDR_SIZE); - cmd_len = le16_to_cpu(cmd.len); + len = le16_to_cpu(cmd->len); - BT_DBG("code 0x%2.2x len %d id 0x%2.2x", cmd.code, cmd_len, - cmd.ident); + BT_DBG("code 0x%2.2x len %d id 0x%2.2x", cmd->code, len, + cmd->ident); - if (cmd_len > len || !cmd.ident) { + if (len > skb->len || !cmd->ident) { BT_DBG("corrupted command"); break; } - err = l2cap_bredr_sig_cmd(conn, &cmd, cmd_len, data); + err = l2cap_bredr_sig_cmd(conn, cmd, len, skb->data); if (err) { struct l2cap_cmd_rej_unk rej; BT_ERR("Wrong link type (%d)", err); rej.reason = cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD); - l2cap_send_cmd(conn, cmd.ident, L2CAP_COMMAND_REJ, + l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej); } - data += cmd_len; - len -= cmd_len; + skb_pull(skb, len); } drop: @@ -6814,11 +7298,13 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan) struct l2cap_le_credits pkt; u16 return_credits; - return_credits = ((chan->imtu / chan->mps) + 1) - chan->rx_credits; + return_credits = (chan->imtu / chan->mps) + 1; - if (!return_credits) + if (chan->rx_credits >= return_credits) return; + return_credits -= chan->rx_credits; + BT_DBG("chan %p returning %u credits to sender", chan, return_credits); chan->rx_credits += return_credits; @@ -6831,7 +7317,7 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan) l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CREDITS, sizeof(pkt), &pkt); } -static int l2cap_le_recv(struct l2cap_chan *chan, struct sk_buff *skb) +static int l2cap_ecred_recv(struct l2cap_chan *chan, struct sk_buff *skb) { int err; @@ -6846,7 +7332,7 @@ static int l2cap_le_recv(struct l2cap_chan *chan, struct sk_buff *skb) return err; } -static int l2cap_le_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) +static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) { int err; @@ -6894,7 +7380,7 @@ static int l2cap_le_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) } if (skb->len == sdu_len) - return l2cap_le_recv(chan, skb); + return l2cap_ecred_recv(chan, skb); chan->sdu = skb; chan->sdu_len = sdu_len; @@ -6926,7 +7412,7 @@ static int l2cap_le_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) skb = NULL; if (chan->sdu->len == chan->sdu_len) { - err = l2cap_le_recv(chan, chan->sdu); + err = l2cap_ecred_recv(chan, chan->sdu); if (!err) { chan->sdu = NULL; chan->sdu_last_frag = NULL; @@ -6987,7 +7473,8 @@ static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid, switch (chan->mode) { case L2CAP_MODE_LE_FLOWCTL: - if (l2cap_le_data_rcv(chan, skb) < 0) + case L2CAP_MODE_EXT_FLOWCTL: + if (l2cap_ecred_data_rcv(chan, skb) < 0) goto drop; goto done; @@ -7214,8 +7701,8 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, struct hci_dev *hdev; int err; - BT_DBG("%pMR -> %pMR (type %u) psm 0x%2.2x", &chan->src, dst, - dst_type, __le16_to_cpu(psm)); + BT_DBG("%pMR -> %pMR (type %u) psm 0x%4.4x mode 0x%2.2x", &chan->src, + dst, dst_type, __le16_to_cpu(psm), chan->mode); hdev = hci_get_route(dst, &chan->src, chan->src_type); if (!hdev) @@ -7244,6 +7731,12 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, break; case L2CAP_MODE_LE_FLOWCTL: break; + case L2CAP_MODE_EXT_FLOWCTL: + if (!enable_ecred) { + err = -EOPNOTSUPP; + goto done; + } + break; case L2CAP_MODE_ERTM: case L2CAP_MODE_STREAMING: if (!disable_ertm) @@ -7368,6 +7861,38 @@ done: } EXPORT_SYMBOL_GPL(l2cap_chan_connect); +static void l2cap_ecred_reconfigure(struct l2cap_chan *chan) +{ + struct l2cap_conn *conn = chan->conn; + struct { + struct l2cap_ecred_reconf_req req; + __le16 scid; + } pdu; + + pdu.req.mtu = cpu_to_le16(chan->imtu); + pdu.req.mps = cpu_to_le16(chan->mps); + pdu.scid = cpu_to_le16(chan->scid); + + chan->ident = l2cap_get_ident(conn); + + l2cap_send_cmd(conn, chan->ident, L2CAP_ECRED_RECONF_REQ, + sizeof(pdu), &pdu); +} + +int l2cap_chan_reconfigure(struct l2cap_chan *chan, __u16 mtu) +{ + if (chan->imtu > mtu) + return -EINVAL; + + BT_DBG("chan %p mtu 0x%4.4x", chan, mtu); + + chan->imtu = mtu; + + l2cap_ecred_reconfigure(chan); + + return 0; +} + /* ---- L2CAP interface with lower layer (HCI) ---- */ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr) @@ -7579,7 +8104,8 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) else __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); } else if (chan->state == BT_CONNECT2 && - chan->mode != L2CAP_MODE_LE_FLOWCTL) { + !(chan->mode == L2CAP_MODE_EXT_FLOWCTL || + chan->mode == L2CAP_MODE_LE_FLOWCTL)) { struct l2cap_conn_rsp rsp; __u16 res, stat; @@ -7787,3 +8313,6 @@ void l2cap_exit(void) module_param(disable_ertm, bool, 0644); MODULE_PARM_DESC(disable_ertm, "Disable enhanced retransmission mode"); + +module_param(enable_ecred, bool, 0644); +MODULE_PARM_DESC(enable_ecred, "Enable enhanced credit flow control mode"); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index a7be8b59b3c2..40fb10b591bd 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -232,7 +232,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, return -EINVAL; } - if (chan->psm && bdaddr_type_is_le(chan->src_type)) + if (chan->psm && bdaddr_type_is_le(chan->src_type) && !chan->mode) chan->mode = L2CAP_MODE_LE_FLOWCTL; err = l2cap_chan_connect(chan, la.l2_psm, __le16_to_cpu(la.l2_cid), @@ -274,6 +274,12 @@ static int l2cap_sock_listen(struct socket *sock, int backlog) case L2CAP_MODE_BASIC: case L2CAP_MODE_LE_FLOWCTL: break; + case L2CAP_MODE_EXT_FLOWCTL: + if (!enable_ecred) { + err = -EOPNOTSUPP; + goto done; + } + break; case L2CAP_MODE_ERTM: case L2CAP_MODE_STREAMING: if (!disable_ertm) @@ -427,6 +433,8 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, opts.max_tx = chan->max_tx; opts.txwin_size = chan->tx_win; + BT_DBG("mode 0x%2.2x", chan->mode); + len = min_t(unsigned int, len, sizeof(opts)); if (copy_to_user(optval, (char *) &opts, len)) err = -EFAULT; @@ -499,6 +507,7 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, struct l2cap_chan *chan = l2cap_pi(sk)->chan; struct bt_security sec; struct bt_power pwr; + u32 phys; int len, err = 0; BT_DBG("sk %p", sk); @@ -603,6 +612,18 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, err = -EFAULT; break; + case BT_PHY: + if (sk->sk_state != BT_CONNECTED) { + err = -ENOTCONN; + break; + } + + phys = hci_conn_get_phy(chan->conn->hcon); + + if (put_user(phys, (u32 __user *) optval)) + err = -EFAULT; + break; + default: err = -ENOPROTOOPT; break; @@ -694,6 +715,8 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, break; } + BT_DBG("mode 0x%2.2x", chan->mode); + chan->imtu = opts.imtu; chan->omtu = opts.omtu; chan->fcs = opts.fcs; @@ -926,7 +949,8 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (sk->sk_state == BT_CONNECTED) { + if (chan->mode == L2CAP_MODE_LE_FLOWCTL && + sk->sk_state == BT_CONNECTED) { err = -EISCONN; break; } @@ -936,7 +960,12 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - chan->imtu = opt; + if (chan->mode == L2CAP_MODE_EXT_FLOWCTL && + sk->sk_state == BT_CONNECTED) + err = l2cap_chan_reconfigure(chan, opt); + else + chan->imtu = opt; + break; default: @@ -991,7 +1020,11 @@ static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg, if (sk->sk_state == BT_CONNECT2 && test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { - if (bdaddr_type_is_le(pi->chan->src_type)) { + if (pi->chan->mode == L2CAP_MODE_EXT_FLOWCTL) { + sk->sk_state = BT_CONNECTED; + pi->chan->state = BT_CONNECTED; + __l2cap_ecred_conn_rsp_defer(pi->chan); + } else if (bdaddr_type_is_le(pi->chan->src_type)) { sk->sk_state = BT_CONNECTED; pi->chan->state = BT_CONNECTED; __l2cap_le_connect_rsp_defer(pi->chan); @@ -1042,7 +1075,7 @@ done: } /* Kill socket (only if zapped and orphan) - * Must be called on unlocked socket. + * Must be called on unlocked socket, with l2cap channel lock. */ static void l2cap_sock_kill(struct sock *sk) { @@ -1193,6 +1226,7 @@ static int l2cap_sock_release(struct socket *sock) { struct sock *sk = sock->sk; int err; + struct l2cap_chan *chan; BT_DBG("sock %p, sk %p", sock, sk); @@ -1202,9 +1236,17 @@ static int l2cap_sock_release(struct socket *sock) bt_sock_unlink(&l2cap_sk_list, sk); err = l2cap_sock_shutdown(sock, 2); + chan = l2cap_pi(sk)->chan; + + l2cap_chan_hold(chan); + l2cap_chan_lock(chan); sock_orphan(sk); l2cap_sock_kill(sk); + + l2cap_chan_unlock(chan); + l2cap_chan_put(chan); + return err; } @@ -1222,12 +1264,15 @@ static void l2cap_sock_cleanup_listen(struct sock *parent) BT_DBG("child chan %p state %s", chan, state_to_string(chan->state)); + l2cap_chan_hold(chan); l2cap_chan_lock(chan); + __clear_chan_timer(chan); l2cap_chan_close(chan, ECONNRESET); - l2cap_chan_unlock(chan); - l2cap_sock_kill(sk); + + l2cap_chan_unlock(chan); + l2cap_chan_put(chan); } } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 3074363c68df..6552003a170e 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -38,7 +38,7 @@ #include "mgmt_util.h" #define MGMT_VERSION 1 -#define MGMT_REVISION 15 +#define MGMT_REVISION 16 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, @@ -107,6 +107,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_READ_EXT_INFO, MGMT_OP_SET_APPEARANCE, MGMT_OP_SET_BLOCKED_KEYS, + MGMT_OP_SET_WIDEBAND_SPEECH, }; static const u16 mgmt_events[] = { @@ -762,6 +763,10 @@ static u32 get_supported_settings(struct hci_dev *hdev) if (lmp_sc_capable(hdev)) settings |= MGMT_SETTING_SECURE_CONN; + + if (test_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, + &hdev->quirks)) + settings |= MGMT_SETTING_WIDEBAND_SPEECH; } if (lmp_le_capable(hdev)) { @@ -846,6 +851,9 @@ static u32 get_current_settings(struct hci_dev *hdev) settings |= MGMT_SETTING_STATIC_ADDRESS; } + if (hci_dev_test_flag(hdev, HCI_WIDEBAND_SPEECH_ENABLED)) + settings |= MGMT_SETTING_WIDEBAND_SPEECH; + return settings; } @@ -1382,6 +1390,12 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } + if (hdev->advertising_paused) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, + MGMT_STATUS_BUSY); + goto failed; + } + if (!hdev_is_powered(hdev)) { bool changed = false; @@ -3589,6 +3603,62 @@ static int set_blocked_keys(struct sock *sk, struct hci_dev *hdev, void *data, err, NULL, 0); } +static int set_wideband_speech(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len) +{ + struct mgmt_mode *cp = data; + int err; + bool changed = false; + + BT_DBG("request for %s", hdev->name); + + if (!test_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks)) + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_WIDEBAND_SPEECH, + MGMT_STATUS_NOT_SUPPORTED); + + if (cp->val != 0x00 && cp->val != 0x01) + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_WIDEBAND_SPEECH, + MGMT_STATUS_INVALID_PARAMS); + + hci_dev_lock(hdev); + + if (pending_find(MGMT_OP_SET_WIDEBAND_SPEECH, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_WIDEBAND_SPEECH, + MGMT_STATUS_BUSY); + goto unlock; + } + + if (hdev_is_powered(hdev) && + !!cp->val != hci_dev_test_flag(hdev, + HCI_WIDEBAND_SPEECH_ENABLED)) { + err = mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_WIDEBAND_SPEECH, + MGMT_STATUS_REJECTED); + goto unlock; + } + + if (cp->val) + changed = !hci_dev_test_and_set_flag(hdev, + HCI_WIDEBAND_SPEECH_ENABLED); + else + changed = hci_dev_test_and_clear_flag(hdev, + HCI_WIDEBAND_SPEECH_ENABLED); + + err = send_settings_rsp(sk, MGMT_OP_SET_WIDEBAND_SPEECH, hdev); + if (err < 0) + goto unlock; + + if (changed) + err = new_settings(hdev, sk); + +unlock: + hci_dev_unlock(hdev); + return err; +} + static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status, u16 opcode, struct sk_buff *skb) { @@ -3865,6 +3935,13 @@ void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status) } hci_dev_unlock(hdev); + + /* Handle suspend notifier */ + if (test_and_clear_bit(SUSPEND_UNPAUSE_DISCOVERY, + hdev->suspend_tasks)) { + bt_dev_dbg(hdev, "Unpaused discovery"); + wake_up(&hdev->suspend_wait_q); + } } static bool discovery_type_is_valid(struct hci_dev *hdev, uint8_t type, @@ -3926,6 +4003,13 @@ static int start_discovery_internal(struct sock *sk, struct hci_dev *hdev, goto failed; } + /* Can't start discovery when it is paused */ + if (hdev->discovery_paused) { + err = mgmt_cmd_complete(sk, hdev->id, op, MGMT_STATUS_BUSY, + &cp->type, sizeof(cp->type)); + goto failed; + } + /* Clear the discovery filter first to free any previously * allocated memory for the UUID list. */ @@ -4093,6 +4177,12 @@ void mgmt_stop_discovery_complete(struct hci_dev *hdev, u8 status) } hci_dev_unlock(hdev); + + /* Handle suspend notifier */ + if (test_and_clear_bit(SUSPEND_PAUSE_DISCOVERY, hdev->suspend_tasks)) { + bt_dev_dbg(hdev, "Paused discovery"); + wake_up(&hdev->suspend_wait_q); + } } static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, @@ -4324,6 +4414,17 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status, if (match.sk) sock_put(match.sk); + /* Handle suspend notifier */ + if (test_and_clear_bit(SUSPEND_PAUSE_ADVERTISING, + hdev->suspend_tasks)) { + bt_dev_dbg(hdev, "Paused advertising"); + wake_up(&hdev->suspend_wait_q); + } else if (test_and_clear_bit(SUSPEND_UNPAUSE_ADVERTISING, + hdev->suspend_tasks)) { + bt_dev_dbg(hdev, "Unpaused advertising"); + wake_up(&hdev->suspend_wait_q); + } + /* If "Set Advertising" was just disabled and instance advertising was * set up earlier, then re-enable multi-instance advertising. */ @@ -4375,6 +4476,10 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); + if (hdev->advertising_paused) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, + MGMT_STATUS_BUSY); + hci_dev_lock(hdev); val = !!cp->val; @@ -6743,8 +6848,11 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, if (!err) err = hci_req_run(&req, add_advertising_complete); - if (err < 0) + if (err < 0) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + MGMT_STATUS_FAILED); mgmt_pending_remove(cmd); + } unlock: hci_dev_unlock(hdev); @@ -6990,6 +7098,7 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { set_phy_configuration, MGMT_SET_PHY_CONFIGURATION_SIZE }, { set_blocked_keys, MGMT_OP_SET_BLOCKED_KEYS_SIZE, HCI_MGMT_VAR_LEN }, + { set_wideband_speech, MGMT_SETTING_SIZE }, }; void mgmt_index_added(struct hci_dev *hdev) diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 3a9e9d9670be..2e20af317cea 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -40,7 +40,6 @@ static bool disable_cfc; static bool l2cap_ertm; static int channel_mtu = -1; -static unsigned int l2cap_mtu = RFCOMM_MAX_L2CAP_MTU; static struct task_struct *rfcomm_thread; @@ -73,8 +72,6 @@ static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s); /* ---- RFCOMM frame parsing macros ---- */ #define __get_dlci(b) ((b & 0xfc) >> 2) -#define __get_channel(b) ((b & 0xf8) >> 3) -#define __get_dir(b) ((b & 0x04) >> 2) #define __get_type(b) ((b & 0xef)) #define __test_ea(b) ((b & 0x01)) @@ -87,7 +84,6 @@ static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s); #define __ctrl(type, pf) (((type & 0xef) | (pf << 4))) #define __dlci(dir, chn) (((chn & 0x1f) << 1) | dir) #define __srv_channel(dlci) (dlci >> 1) -#define __dir(dlci) (dlci & 0x01) #define __len8(len) (((len) << 1) | 1) #define __len16(len) ((len) << 1) @@ -752,7 +748,8 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, /* Set L2CAP options */ sk = sock->sk; lock_sock(sk); - l2cap_pi(sk)->chan->imtu = l2cap_mtu; + /* Set MTU to 0 so L2CAP can auto select the MTU */ + l2cap_pi(sk)->chan->imtu = 0; l2cap_pi(sk)->chan->sec_level = sec_level; if (l2cap_ertm) l2cap_pi(sk)->chan->mode = L2CAP_MODE_ERTM; @@ -2039,7 +2036,8 @@ static int rfcomm_add_listener(bdaddr_t *ba) /* Set L2CAP options */ sk = sock->sk; lock_sock(sk); - l2cap_pi(sk)->chan->imtu = l2cap_mtu; + /* Set MTU to 0 so L2CAP can auto select the MTU */ + l2cap_pi(sk)->chan->imtu = 0; release_sock(sk); /* Start listening on the socket */ @@ -2237,9 +2235,6 @@ MODULE_PARM_DESC(disable_cfc, "Disable credit based flow control"); module_param(channel_mtu, int, 0644); MODULE_PARM_DESC(channel_mtu, "Default MTU for the RFCOMM channel"); -module_param(l2cap_mtu, uint, 0644); -MODULE_PARM_DESC(l2cap_mtu, "Default MTU for the L2CAP connection"); - module_param(l2cap_ertm, bool, 0644); MODULE_PARM_DESC(l2cap_ertm, "Use L2CAP ERTM mode for connection"); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 0c7d31c6c18c..a58584949a95 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -413,10 +413,8 @@ static int __rfcomm_create_dev(struct sock *sk, void __user *arg) dlc = rfcomm_dlc_exists(&req.src, &req.dst, req.channel); if (IS_ERR(dlc)) return PTR_ERR(dlc); - else if (dlc) { - rfcomm_dlc_put(dlc); + if (dlc) return -EBUSY; - } dlc = rfcomm_dlc_alloc(GFP_KERNEL); if (!dlc) return -ENOMEM; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index b91d6b440fdf..c8c3d38cdc7b 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -922,6 +922,7 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, struct sock *sk = sock->sk; int len, err = 0; struct bt_voice voice; + u32 phys; BT_DBG("sk %p", sk); @@ -956,6 +957,18 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, break; + case BT_PHY: + if (sk->sk_state != BT_CONNECTED) { + err = -ENOTCONN; + break; + } + + phys = hci_conn_get_phy(sco_pi(sk)->conn->hcon); + + if (put_user(phys, (u32 __user *) optval)) + err = -EFAULT; + break; + default: err = -ENOPROTOOPT; break; diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 204f14f8b507..1476a91ce935 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -1145,7 +1145,7 @@ static void sc_generate_link_key(struct smp_chan *smp) return; if (test_bit(SMP_FLAG_CT2, &smp->flags)) { - /* SALT = 0x00000000000000000000000000000000746D7031 */ + /* SALT = 0x000000000000000000000000746D7031 */ const u8 salt[16] = { 0x31, 0x70, 0x6d, 0x74 }; if (smp_h7(smp->tfm_cmac, smp->tk, salt, smp->link_key)) { @@ -1203,7 +1203,7 @@ static void sc_generate_ltk(struct smp_chan *smp) set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags); if (test_bit(SMP_FLAG_CT2, &smp->flags)) { - /* SALT = 0x00000000000000000000000000000000746D7032 */ + /* SALT = 0x000000000000000000000000746D7032 */ const u8 salt[16] = { 0x32, 0x70, 0x6d, 0x74 }; if (smp_h7(smp->tfm_cmac, key->val, salt, smp->tk)) @@ -2115,7 +2115,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) struct l2cap_chan *chan = conn->smp; struct smp_chan *smp = chan->data; struct hci_conn *hcon = conn->hcon; - u8 *pkax, *pkbx, *na, *nb; + u8 *pkax, *pkbx, *na, *nb, confirm_hint; u32 passkey; int err; @@ -2168,6 +2168,24 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK); + + /* Only Just-Works pairing requires extra checks */ + if (smp->method != JUST_WORKS) + goto mackey_and_ltk; + + /* If there already exists long term key in local host, leave + * the decision to user space since the remote device could + * be legitimate or malicious. + */ + if (hci_find_ltk(hcon->hdev, &hcon->dst, hcon->dst_type, + hcon->role)) { + /* Set passkey to 0. The value can be any number since + * it'll be ignored anyway. + */ + passkey = 0; + confirm_hint = 1; + goto confirm; + } } mackey_and_ltk: @@ -2188,8 +2206,11 @@ mackey_and_ltk: if (err) return SMP_UNSPECIFIED; + confirm_hint = 0; + +confirm: err = mgmt_user_confirm_request(hcon->hdev, &hcon->dst, hcon->type, - hcon->dst_type, passkey, 0); + hcon->dst_type, passkey, confirm_hint); if (err) return SMP_UNSPECIFIED; diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 562443f94133..4c921f5154e0 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -10,6 +10,7 @@ #include <net/bpf_sk_storage.h> #include <net/sock.h> #include <net/tcp.h> +#include <linux/error-injection.h> #define CREATE_TRACE_POINTS #include <trace/events/bpf_test_run.h> @@ -143,6 +144,14 @@ int noinline bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f) return a + (long)b + c + d + (long)e + f; } +int noinline bpf_modify_return_test(int a, int *b) +{ + *b += 1; + return a + *b; +} + +ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO); + static void *bpf_test_init(const union bpf_attr *kattr, u32 size, u32 headroom, u32 tailroom) { @@ -160,18 +169,48 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size, kfree(data); return ERR_PTR(-EFAULT); } - if (bpf_fentry_test1(1) != 2 || - bpf_fentry_test2(2, 3) != 5 || - bpf_fentry_test3(4, 5, 6) != 15 || - bpf_fentry_test4((void *)7, 8, 9, 10) != 34 || - bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 || - bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111) { - kfree(data); - return ERR_PTR(-EFAULT); - } + return data; } +int bpf_prog_test_run_tracing(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + u16 side_effect = 0, ret = 0; + int b = 2, err = -EFAULT; + u32 retval = 0; + + switch (prog->expected_attach_type) { + case BPF_TRACE_FENTRY: + case BPF_TRACE_FEXIT: + if (bpf_fentry_test1(1) != 2 || + bpf_fentry_test2(2, 3) != 5 || + bpf_fentry_test3(4, 5, 6) != 15 || + bpf_fentry_test4((void *)7, 8, 9, 10) != 34 || + bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 || + bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111) + goto out; + break; + case BPF_MODIFY_RETURN: + ret = bpf_modify_return_test(1, &b); + if (b != 2) + side_effect = 1; + break; + default: + goto out; + } + + retval = ((u32)side_effect << 16) | ret; + if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval))) + goto out; + + err = 0; +out: + trace_bpf_test_finish(&err); + return err; +} + static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size) { void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in); @@ -277,6 +316,12 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) /* gso_segs is allowed */ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_segs), + offsetof(struct __sk_buff, gso_size))) + return -EINVAL; + + /* gso_size is allowed */ + + if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_size), sizeof(struct __sk_buff))) return -EINVAL; @@ -297,6 +342,7 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) if (__skb->gso_segs > GSO_MAX_SEGS) return -EINVAL; skb_shinfo(skb)->gso_segs = __skb->gso_segs; + skb_shinfo(skb)->gso_size = __skb->gso_size; return 0; } diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c index afee292fb004..162998e2f039 100644 --- a/net/bridge/br_netlink_tunnel.c +++ b/net/bridge/br_netlink_tunnel.c @@ -26,8 +26,8 @@ static size_t __get_vlan_tinfo_size(void) nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_VLAN_TUNNEL_FLAGS */ } -static bool vlan_tunid_inrange(struct net_bridge_vlan *v_curr, - struct net_bridge_vlan *v_last) +bool vlan_tunid_inrange(const struct net_bridge_vlan *v_curr, + const struct net_bridge_vlan *v_last) { __be32 tunid_curr = tunnel_id_to_key32(v_curr->tinfo.tunnel_id); __be32 tunid_last = tunnel_id_to_key32(v_last->tinfo.tunnel_id); @@ -193,8 +193,8 @@ static const struct nla_policy vlan_tunnel_policy[IFLA_BRIDGE_VLAN_TUNNEL_MAX + [IFLA_BRIDGE_VLAN_TUNNEL_FLAGS] = { .type = NLA_U16 }, }; -static int br_vlan_tunnel_info(struct net_bridge_port *p, int cmd, - u16 vid, u32 tun_id, bool *changed) +int br_vlan_tunnel_info(const struct net_bridge_port *p, int cmd, + u16 vid, u32 tun_id, bool *changed) { int err = 0; @@ -250,8 +250,8 @@ int br_parse_vlan_tunnel_info(struct nlattr *attr, return 0; } -int br_process_vlan_tunnel_info(struct net_bridge *br, - struct net_bridge_port *p, int cmd, +int br_process_vlan_tunnel_info(const struct net_bridge *br, + const struct net_bridge_port *p, int cmd, struct vtunnel_info *tinfo_curr, struct vtunnel_info *tinfo_last, bool *changed) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 5153ffe79a01..1f97703a52ff 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1199,8 +1199,8 @@ static inline void br_vlan_notify(const struct net_bridge *br, /* br_vlan_options.c */ #ifdef CONFIG_BRIDGE_VLAN_FILTERING -bool br_vlan_opts_eq(const struct net_bridge_vlan *v1, - const struct net_bridge_vlan *v2); +bool br_vlan_opts_eq_range(const struct net_bridge_vlan *v_curr, + const struct net_bridge_vlan *range_end); bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v); size_t br_vlan_opts_nl_size(void); int br_vlan_process_options(const struct net_bridge *br, diff --git a/net/bridge/br_private_tunnel.h b/net/bridge/br_private_tunnel.h index 2bdef2ea3420..c54cc26211d7 100644 --- a/net/bridge/br_private_tunnel.h +++ b/net/bridge/br_private_tunnel.h @@ -18,8 +18,8 @@ struct vtunnel_info { /* br_netlink_tunnel.c */ int br_parse_vlan_tunnel_info(struct nlattr *attr, struct vtunnel_info *tinfo); -int br_process_vlan_tunnel_info(struct net_bridge *br, - struct net_bridge_port *p, +int br_process_vlan_tunnel_info(const struct net_bridge *br, + const struct net_bridge_port *p, int cmd, struct vtunnel_info *tinfo_curr, struct vtunnel_info *tinfo_last, @@ -32,8 +32,9 @@ int br_fill_vlan_tunnel_info(struct sk_buff *skb, /* br_vlan_tunnel.c */ int vlan_tunnel_init(struct net_bridge_vlan_group *vg); void vlan_tunnel_deinit(struct net_bridge_vlan_group *vg); -int nbp_vlan_tunnel_info_delete(struct net_bridge_port *port, u16 vid); -int nbp_vlan_tunnel_info_add(struct net_bridge_port *port, u16 vid, u32 tun_id); +int nbp_vlan_tunnel_info_delete(const struct net_bridge_port *port, u16 vid); +int nbp_vlan_tunnel_info_add(const struct net_bridge_port *port, u16 vid, + u32 tun_id); void nbp_vlan_tunnel_info_flush(struct net_bridge_port *port); void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg, struct net_bridge_vlan *vlan); @@ -42,19 +43,23 @@ int br_handle_ingress_vlan_tunnel(struct sk_buff *skb, struct net_bridge_vlan_group *vg); int br_handle_egress_vlan_tunnel(struct sk_buff *skb, struct net_bridge_vlan *vlan); +bool vlan_tunid_inrange(const struct net_bridge_vlan *v_curr, + const struct net_bridge_vlan *v_last); +int br_vlan_tunnel_info(const struct net_bridge_port *p, int cmd, + u16 vid, u32 tun_id, bool *changed); #else static inline int vlan_tunnel_init(struct net_bridge_vlan_group *vg) { return 0; } -static inline int nbp_vlan_tunnel_info_delete(struct net_bridge_port *port, +static inline int nbp_vlan_tunnel_info_delete(const struct net_bridge_port *port, u16 vid) { return 0; } -static inline int nbp_vlan_tunnel_info_add(struct net_bridge_port *port, +static inline int nbp_vlan_tunnel_info_add(const struct net_bridge_port *port, u16 vid, u32 tun_id) { return 0; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 6b5deca08b89..f9092c71225f 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -1569,10 +1569,41 @@ void br_vlan_port_event(struct net_bridge_port *p, unsigned long event) } } +static bool br_vlan_stats_fill(struct sk_buff *skb, + const struct net_bridge_vlan *v) +{ + struct br_vlan_stats stats; + struct nlattr *nest; + + nest = nla_nest_start(skb, BRIDGE_VLANDB_ENTRY_STATS); + if (!nest) + return false; + + br_vlan_get_stats(v, &stats); + if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_RX_BYTES, stats.rx_bytes, + BRIDGE_VLANDB_STATS_PAD) || + nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_RX_PACKETS, + stats.rx_packets, BRIDGE_VLANDB_STATS_PAD) || + nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_TX_BYTES, stats.tx_bytes, + BRIDGE_VLANDB_STATS_PAD) || + nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_TX_PACKETS, + stats.tx_packets, BRIDGE_VLANDB_STATS_PAD)) + goto out_err; + + nla_nest_end(skb, nest); + + return true; + +out_err: + nla_nest_cancel(skb, nest); + return false; +} + /* v_opts is used to dump the options which must be equal in the whole range */ static bool br_vlan_fill_vids(struct sk_buff *skb, u16 vid, u16 vid_range, const struct net_bridge_vlan *v_opts, - u16 flags) + u16 flags, + bool dump_stats) { struct bridge_vlan_info info; struct nlattr *nest; @@ -1596,8 +1627,13 @@ static bool br_vlan_fill_vids(struct sk_buff *skb, u16 vid, u16 vid_range, nla_put_u16(skb, BRIDGE_VLANDB_ENTRY_RANGE, vid_range)) goto out_err; - if (v_opts && !br_vlan_opts_fill(skb, v_opts)) - goto out_err; + if (v_opts) { + if (!br_vlan_opts_fill(skb, v_opts)) + goto out_err; + + if (dump_stats && !br_vlan_stats_fill(skb, v_opts)) + goto out_err; + } nla_nest_end(skb, nest); @@ -1675,7 +1711,7 @@ void br_vlan_notify(const struct net_bridge *br, goto out_kfree; } - if (!br_vlan_fill_vids(skb, vid, vid_range, v, flags)) + if (!br_vlan_fill_vids(skb, vid, vid_range, v, flags, false)) goto out_err; nlmsg_end(skb, nlh); @@ -1694,14 +1730,16 @@ bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr, { return v_curr->vid - range_end->vid == 1 && range_end->flags == v_curr->flags && - br_vlan_opts_eq(v_curr, range_end); + br_vlan_opts_eq_range(v_curr, range_end); } static int br_vlan_dump_dev(const struct net_device *dev, struct sk_buff *skb, - struct netlink_callback *cb) + struct netlink_callback *cb, + u32 dump_flags) { struct net_bridge_vlan *v, *range_start = NULL, *range_end = NULL; + bool dump_stats = !!(dump_flags & BRIDGE_VLANDB_DUMPF_STATS); struct net_bridge_vlan_group *vg; int idx = 0, s_idx = cb->args[1]; struct nlmsghdr *nlh = NULL; @@ -1754,12 +1792,13 @@ static int br_vlan_dump_dev(const struct net_device *dev, continue; } - if (v->vid == pvid || !br_vlan_can_enter_range(v, range_end)) { - u16 flags = br_vlan_flags(range_start, pvid); + if (dump_stats || v->vid == pvid || + !br_vlan_can_enter_range(v, range_end)) { + u16 vlan_flags = br_vlan_flags(range_start, pvid); if (!br_vlan_fill_vids(skb, range_start->vid, range_end->vid, range_start, - flags)) { + vlan_flags, dump_stats)) { err = -EMSGSIZE; break; } @@ -1778,7 +1817,8 @@ static int br_vlan_dump_dev(const struct net_device *dev, */ if (!err && range_start && !br_vlan_fill_vids(skb, range_start->vid, range_end->vid, - range_start, br_vlan_flags(range_start, pvid))) + range_start, br_vlan_flags(range_start, pvid), + dump_stats)) err = -EMSGSIZE; cb->args[1] = err ? idx : 0; @@ -1788,18 +1828,27 @@ static int br_vlan_dump_dev(const struct net_device *dev, return err; } +static const struct nla_policy br_vlan_db_dump_pol[BRIDGE_VLANDB_DUMP_MAX + 1] = { + [BRIDGE_VLANDB_DUMP_FLAGS] = { .type = NLA_U32 }, +}; + static int br_vlan_rtm_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct nlattr *dtb[BRIDGE_VLANDB_DUMP_MAX + 1]; int idx = 0, err = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); struct br_vlan_msg *bvm; struct net_device *dev; + u32 dump_flags = 0; - err = nlmsg_parse(cb->nlh, sizeof(*bvm), NULL, 0, NULL, cb->extack); + err = nlmsg_parse(cb->nlh, sizeof(*bvm), dtb, BRIDGE_VLANDB_DUMP_MAX, + br_vlan_db_dump_pol, cb->extack); if (err < 0) return err; bvm = nlmsg_data(cb->nlh); + if (dtb[BRIDGE_VLANDB_DUMP_FLAGS]) + dump_flags = nla_get_u32(dtb[BRIDGE_VLANDB_DUMP_FLAGS]); rcu_read_lock(); if (bvm->ifindex) { @@ -1808,7 +1857,7 @@ static int br_vlan_rtm_dump(struct sk_buff *skb, struct netlink_callback *cb) err = -ENODEV; goto out_err; } - err = br_vlan_dump_dev(dev, skb, cb); + err = br_vlan_dump_dev(dev, skb, cb, dump_flags); if (err && err != -EMSGSIZE) goto out_err; } else { @@ -1816,7 +1865,7 @@ static int br_vlan_rtm_dump(struct sk_buff *skb, struct netlink_callback *cb) if (idx < s_idx) goto skip; - err = br_vlan_dump_dev(dev, skb, cb); + err = br_vlan_dump_dev(dev, skb, cb, dump_flags); if (err == -EMSGSIZE) break; skip: @@ -1839,6 +1888,7 @@ static const struct nla_policy br_vlan_db_policy[BRIDGE_VLANDB_ENTRY_MAX + 1] = .len = sizeof(struct bridge_vlan_info) }, [BRIDGE_VLANDB_ENTRY_RANGE] = { .type = NLA_U16 }, [BRIDGE_VLANDB_ENTRY_STATE] = { .type = NLA_U8 }, + [BRIDGE_VLANDB_ENTRY_TUNNEL_INFO] = { .type = NLA_NESTED }, }; static int br_vlan_rtm_process_one(struct net_device *dev, diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c index cd2eb194eb98..b4add9ea8964 100644 --- a/net/bridge/br_vlan_options.c +++ b/net/bridge/br_vlan_options.c @@ -4,25 +4,58 @@ #include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <linux/slab.h> +#include <net/ip_tunnels.h> #include "br_private.h" +#include "br_private_tunnel.h" -/* check if the options between two vlans are equal */ -bool br_vlan_opts_eq(const struct net_bridge_vlan *v1, - const struct net_bridge_vlan *v2) +static bool __vlan_tun_put(struct sk_buff *skb, const struct net_bridge_vlan *v) { - return v1->state == v2->state; + __be32 tid = tunnel_id_to_key32(v->tinfo.tunnel_id); + struct nlattr *nest; + + if (!v->tinfo.tunnel_dst) + return true; + + nest = nla_nest_start(skb, BRIDGE_VLANDB_ENTRY_TUNNEL_INFO); + if (!nest) + return false; + if (nla_put_u32(skb, BRIDGE_VLANDB_TINFO_ID, be32_to_cpu(tid))) { + nla_nest_cancel(skb, nest); + return false; + } + nla_nest_end(skb, nest); + + return true; +} + +static bool __vlan_tun_can_enter_range(const struct net_bridge_vlan *v_curr, + const struct net_bridge_vlan *range_end) +{ + return (!v_curr->tinfo.tunnel_dst && !range_end->tinfo.tunnel_dst) || + vlan_tunid_inrange(v_curr, range_end); +} + +/* check if the options' state of v_curr allow it to enter the range */ +bool br_vlan_opts_eq_range(const struct net_bridge_vlan *v_curr, + const struct net_bridge_vlan *range_end) +{ + return v_curr->state == range_end->state && + __vlan_tun_can_enter_range(v_curr, range_end); } bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v) { return !nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_STATE, - br_vlan_get_state(v)); + br_vlan_get_state(v)) && + __vlan_tun_put(skb, v); } size_t br_vlan_opts_nl_size(void) { - return nla_total_size(sizeof(u8)); /* BRIDGE_VLANDB_ENTRY_STATE */ + return nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_ENTRY_STATE */ + + nla_total_size(0) /* BRIDGE_VLANDB_ENTRY_TUNNEL_INFO */ + + nla_total_size(sizeof(u32)); /* BRIDGE_VLANDB_TINFO_ID */ } static int br_vlan_modify_state(struct net_bridge_vlan_group *vg, @@ -62,6 +95,68 @@ static int br_vlan_modify_state(struct net_bridge_vlan_group *vg, return 0; } +static const struct nla_policy br_vlandb_tinfo_pol[BRIDGE_VLANDB_TINFO_MAX + 1] = { + [BRIDGE_VLANDB_TINFO_ID] = { .type = NLA_U32 }, + [BRIDGE_VLANDB_TINFO_CMD] = { .type = NLA_U32 }, +}; + +static int br_vlan_modify_tunnel(const struct net_bridge_port *p, + struct net_bridge_vlan *v, + struct nlattr **tb, + bool *changed, + struct netlink_ext_ack *extack) +{ + struct nlattr *tun_tb[BRIDGE_VLANDB_TINFO_MAX + 1], *attr; + struct bridge_vlan_info *vinfo; + u32 tun_id = 0; + int cmd, err; + + if (!p) { + NL_SET_ERR_MSG_MOD(extack, "Can't modify tunnel mapping of non-port vlans"); + return -EINVAL; + } + if (!(p->flags & BR_VLAN_TUNNEL)) { + NL_SET_ERR_MSG_MOD(extack, "Port doesn't have tunnel flag set"); + return -EINVAL; + } + + attr = tb[BRIDGE_VLANDB_ENTRY_TUNNEL_INFO]; + err = nla_parse_nested(tun_tb, BRIDGE_VLANDB_TINFO_MAX, attr, + br_vlandb_tinfo_pol, extack); + if (err) + return err; + + if (!tun_tb[BRIDGE_VLANDB_TINFO_CMD]) { + NL_SET_ERR_MSG_MOD(extack, "Missing tunnel command attribute"); + return -ENOENT; + } + cmd = nla_get_u32(tun_tb[BRIDGE_VLANDB_TINFO_CMD]); + switch (cmd) { + case RTM_SETLINK: + if (!tun_tb[BRIDGE_VLANDB_TINFO_ID]) { + NL_SET_ERR_MSG_MOD(extack, "Missing tunnel id attribute"); + return -ENOENT; + } + /* when working on vlan ranges this is the starting tunnel id */ + tun_id = nla_get_u32(tun_tb[BRIDGE_VLANDB_TINFO_ID]); + /* vlan info attr is guaranteed by br_vlan_rtm_process_one */ + vinfo = nla_data(tb[BRIDGE_VLANDB_ENTRY_INFO]); + /* tunnel ids are mapped to each vlan in increasing order, + * the starting vlan is in BRIDGE_VLANDB_ENTRY_INFO and v is the + * current vlan, so we compute: tun_id + v - vinfo->vid + */ + tun_id += v->vid - vinfo->vid; + break; + case RTM_DELLINK: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel command"); + return -EINVAL; + } + + return br_vlan_tunnel_info(p, cmd, v->vid, tun_id, changed); +} + static int br_vlan_process_one_opts(const struct net_bridge *br, const struct net_bridge_port *p, struct net_bridge_vlan_group *vg, @@ -80,6 +175,11 @@ static int br_vlan_process_one_opts(const struct net_bridge *br, if (err) return err; } + if (tb[BRIDGE_VLANDB_ENTRY_TUNNEL_INFO]) { + err = br_vlan_modify_tunnel(p, v, tb, changed, extack); + if (err) + return err; + } return 0; } diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c index d13d2080f527..169e005fbda2 100644 --- a/net/bridge/br_vlan_tunnel.c +++ b/net/bridge/br_vlan_tunnel.c @@ -89,7 +89,8 @@ out: /* Must be protected by RTNL. * Must be called with vid in range from 1 to 4094 inclusive. */ -int nbp_vlan_tunnel_info_add(struct net_bridge_port *port, u16 vid, u32 tun_id) +int nbp_vlan_tunnel_info_add(const struct net_bridge_port *port, u16 vid, + u32 tun_id) { struct net_bridge_vlan_group *vg; struct net_bridge_vlan *vlan; @@ -107,7 +108,7 @@ int nbp_vlan_tunnel_info_add(struct net_bridge_port *port, u16 vid, u32 tun_id) /* Must be protected by RTNL. * Must be called with vid in range from 1 to 4094 inclusive. */ -int nbp_vlan_tunnel_info_delete(struct net_bridge_port *port, u16 vid) +int nbp_vlan_tunnel_info_delete(const struct net_bridge_port *port, u16 vid) { struct net_bridge_vlan_group *vg; struct net_bridge_vlan *v; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index e1256e03a9a8..78db58c7aec2 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1561,7 +1561,7 @@ struct compat_ebt_entry_mwt { compat_uptr_t ptr; } u; compat_uint_t match_size; - compat_uint_t data[0] __attribute__ ((aligned (__alignof__(struct compat_ebt_replace)))); + compat_uint_t data[] __aligned(__alignof__(struct compat_ebt_replace)); }; /* account for possible padding between match_size and ->data */ diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 03c7cdd8e4cb..195d2d67be8a 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -112,7 +112,8 @@ static struct caif_device_entry *caif_get(struct net_device *dev) caif_device_list(dev_net(dev)); struct caif_device_entry *caifd; - list_for_each_entry_rcu(caifd, &caifdevs->list, list) { + list_for_each_entry_rcu(caifd, &caifdevs->list, list, + lockdep_rtnl_is_held()) { if (caifd->netdev == dev) return caifd; } diff --git a/net/core/dev.c b/net/core/dev.c index ccc03abeee52..021e18251465 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2875,7 +2875,6 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) netif_setup_tc(dev, txq); dev->real_num_tx_queues = txq; - dev_qdisc_set_real_num_tx_queues(dev); if (disabling) { synchronize_net(); @@ -4849,7 +4848,8 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, skb->tc_at_ingress = 1; mini_qdisc_bstats_cpu_update(miniq, skb); - switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) { + switch (tcf_classify_ingress(skb, miniq->block, miniq->filter_list, + &cl_res, false)) { case TC_ACT_OK: case TC_ACT_RECLASSIFY: skb->tc_index = TC_H_MIN(cl_res.classid); @@ -9283,6 +9283,10 @@ int register_netdevice(struct net_device *dev) BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); BUG_ON(!net); + ret = ethtool_check_ops(dev->ethtool_ops); + if (ret) + return ret; + spin_lock_init(&dev->addr_list_lock); lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key); diff --git a/net/core/devlink.c b/net/core/devlink.c index e8ccea9035c8..73bb8fbe3393 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -3353,34 +3353,41 @@ devlink_param_value_get_from_info(const struct devlink_param *param, struct genl_info *info, union devlink_param_value *value) { + struct nlattr *param_data; int len; - if (param->type != DEVLINK_PARAM_TYPE_BOOL && - !info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) + param_data = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]; + + if (param->type != DEVLINK_PARAM_TYPE_BOOL && !param_data) return -EINVAL; switch (param->type) { case DEVLINK_PARAM_TYPE_U8: - value->vu8 = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]); + if (nla_len(param_data) != sizeof(u8)) + return -EINVAL; + value->vu8 = nla_get_u8(param_data); break; case DEVLINK_PARAM_TYPE_U16: - value->vu16 = nla_get_u16(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]); + if (nla_len(param_data) != sizeof(u16)) + return -EINVAL; + value->vu16 = nla_get_u16(param_data); break; case DEVLINK_PARAM_TYPE_U32: - value->vu32 = nla_get_u32(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]); + if (nla_len(param_data) != sizeof(u32)) + return -EINVAL; + value->vu32 = nla_get_u32(param_data); break; case DEVLINK_PARAM_TYPE_STRING: - len = strnlen(nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]), - nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])); - if (len == nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) || + len = strnlen(nla_data(param_data), nla_len(param_data)); + if (len == nla_len(param_data) || len >= __DEVLINK_PARAM_MAX_STRING_VALUE) return -EINVAL; - strcpy(value->vstr, - nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])); + strcpy(value->vstr, nla_data(param_data)); break; case DEVLINK_PARAM_TYPE_BOOL: - value->vbool = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA] ? - true : false; + if (param_data && nla_len(param_data)) + return -EINVAL; + value->vbool = nla_get_flag(param_data); break; } return 0; @@ -5449,16 +5456,14 @@ struct devlink_stats { /** * struct devlink_trap_group_item - Packet trap group attributes. * @group: Immutable packet trap group attributes. - * @refcount: Number of trap items using the group. * @list: trap_group_list member. * @stats: Trap group statistics. * * Describes packet trap group attributes. Created by devlink during trap - * registration. + * group registration. */ struct devlink_trap_group_item { const struct devlink_trap_group *group; - refcount_t refcount; struct list_head list; struct devlink_stats __percpu *stats; }; @@ -5811,6 +5816,19 @@ devlink_trap_group_item_lookup(struct devlink *devlink, const char *name) } static struct devlink_trap_group_item * +devlink_trap_group_item_lookup_by_id(struct devlink *devlink, u16 id) +{ + struct devlink_trap_group_item *group_item; + + list_for_each_entry(group_item, &devlink->trap_group_list, list) { + if (group_item->group->id == id) + return group_item; + } + + return NULL; +} + +static struct devlink_trap_group_item * devlink_trap_group_item_get_from_info(struct devlink *devlink, struct genl_info *info) { @@ -5948,7 +5966,7 @@ __devlink_trap_group_action_set(struct devlink *devlink, int err; list_for_each_entry(trap_item, &devlink->trap_list, list) { - if (strcmp(trap_item->trap->group.name, group_name)) + if (strcmp(trap_item->group_item->group->name, group_name)) continue; err = __devlink_trap_action_set(devlink, trap_item, trap_action, extack); @@ -6033,6 +6051,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 }, [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 }, + [DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .type = NLA_U64 }, + [DEVLINK_ATTR_REGION_CHUNK_LEN] = { .type = NLA_U64 }, [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 }, [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 }, @@ -7857,7 +7877,7 @@ static int devlink_trap_driver_verify(const struct devlink_trap *trap) static int devlink_trap_verify(const struct devlink_trap *trap) { - if (!trap || !trap->name || !trap->group.name) + if (!trap || !trap->name) return -EINVAL; if (trap->generic) @@ -7928,108 +7948,22 @@ devlink_trap_group_notify(struct devlink *devlink, msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } -static struct devlink_trap_group_item * -devlink_trap_group_item_create(struct devlink *devlink, - const struct devlink_trap_group *group) -{ - struct devlink_trap_group_item *group_item; - int err; - - err = devlink_trap_group_verify(group); - if (err) - return ERR_PTR(err); - - group_item = kzalloc(sizeof(*group_item), GFP_KERNEL); - if (!group_item) - return ERR_PTR(-ENOMEM); - - group_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats); - if (!group_item->stats) { - err = -ENOMEM; - goto err_stats_alloc; - } - - group_item->group = group; - refcount_set(&group_item->refcount, 1); - - if (devlink->ops->trap_group_init) { - err = devlink->ops->trap_group_init(devlink, group); - if (err) - goto err_group_init; - } - - list_add_tail(&group_item->list, &devlink->trap_group_list); - devlink_trap_group_notify(devlink, group_item, - DEVLINK_CMD_TRAP_GROUP_NEW); - - return group_item; - -err_group_init: - free_percpu(group_item->stats); -err_stats_alloc: - kfree(group_item); - return ERR_PTR(err); -} - -static void -devlink_trap_group_item_destroy(struct devlink *devlink, - struct devlink_trap_group_item *group_item) -{ - devlink_trap_group_notify(devlink, group_item, - DEVLINK_CMD_TRAP_GROUP_DEL); - list_del(&group_item->list); - free_percpu(group_item->stats); - kfree(group_item); -} - -static struct devlink_trap_group_item * -devlink_trap_group_item_get(struct devlink *devlink, - const struct devlink_trap_group *group) -{ - struct devlink_trap_group_item *group_item; - - group_item = devlink_trap_group_item_lookup(devlink, group->name); - if (group_item) { - refcount_inc(&group_item->refcount); - return group_item; - } - - return devlink_trap_group_item_create(devlink, group); -} - -static void -devlink_trap_group_item_put(struct devlink *devlink, - struct devlink_trap_group_item *group_item) -{ - if (!refcount_dec_and_test(&group_item->refcount)) - return; - - devlink_trap_group_item_destroy(devlink, group_item); -} - static int devlink_trap_item_group_link(struct devlink *devlink, struct devlink_trap_item *trap_item) { + u16 group_id = trap_item->trap->init_group_id; struct devlink_trap_group_item *group_item; - group_item = devlink_trap_group_item_get(devlink, - &trap_item->trap->group); - if (IS_ERR(group_item)) - return PTR_ERR(group_item); + group_item = devlink_trap_group_item_lookup_by_id(devlink, group_id); + if (WARN_ON_ONCE(!group_item)) + return -EINVAL; trap_item->group_item = group_item; return 0; } -static void -devlink_trap_item_group_unlink(struct devlink *devlink, - struct devlink_trap_item *trap_item) -{ - devlink_trap_group_item_put(devlink, trap_item->group_item); -} - static void devlink_trap_notify(struct devlink *devlink, const struct devlink_trap_item *trap_item, enum devlink_command cmd) @@ -8092,7 +8026,6 @@ devlink_trap_register(struct devlink *devlink, return 0; err_trap_init: - devlink_trap_item_group_unlink(devlink, trap_item); err_group_link: free_percpu(trap_item->stats); err_stats_alloc: @@ -8113,7 +8046,6 @@ static void devlink_trap_unregister(struct devlink *devlink, list_del(&trap_item->list); if (devlink->ops->trap_fini) devlink->ops->trap_fini(devlink, trap, trap_item); - devlink_trap_item_group_unlink(devlink, trap_item); free_percpu(trap_item->stats); kfree(trap_item); } @@ -8269,6 +8201,122 @@ void *devlink_trap_ctx_priv(void *trap_ctx) } EXPORT_SYMBOL_GPL(devlink_trap_ctx_priv); +static int +devlink_trap_group_register(struct devlink *devlink, + const struct devlink_trap_group *group) +{ + struct devlink_trap_group_item *group_item; + int err; + + if (devlink_trap_group_item_lookup(devlink, group->name)) + return -EEXIST; + + group_item = kzalloc(sizeof(*group_item), GFP_KERNEL); + if (!group_item) + return -ENOMEM; + + group_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats); + if (!group_item->stats) { + err = -ENOMEM; + goto err_stats_alloc; + } + + group_item->group = group; + + if (devlink->ops->trap_group_init) { + err = devlink->ops->trap_group_init(devlink, group); + if (err) + goto err_group_init; + } + + list_add_tail(&group_item->list, &devlink->trap_group_list); + devlink_trap_group_notify(devlink, group_item, + DEVLINK_CMD_TRAP_GROUP_NEW); + + return 0; + +err_group_init: + free_percpu(group_item->stats); +err_stats_alloc: + kfree(group_item); + return err; +} + +static void +devlink_trap_group_unregister(struct devlink *devlink, + const struct devlink_trap_group *group) +{ + struct devlink_trap_group_item *group_item; + + group_item = devlink_trap_group_item_lookup(devlink, group->name); + if (WARN_ON_ONCE(!group_item)) + return; + + devlink_trap_group_notify(devlink, group_item, + DEVLINK_CMD_TRAP_GROUP_DEL); + list_del(&group_item->list); + free_percpu(group_item->stats); + kfree(group_item); +} + +/** + * devlink_trap_groups_register - Register packet trap groups with devlink. + * @devlink: devlink. + * @groups: Packet trap groups. + * @groups_count: Count of provided packet trap groups. + * + * Return: Non-zero value on failure. + */ +int devlink_trap_groups_register(struct devlink *devlink, + const struct devlink_trap_group *groups, + size_t groups_count) +{ + int i, err; + + mutex_lock(&devlink->lock); + for (i = 0; i < groups_count; i++) { + const struct devlink_trap_group *group = &groups[i]; + + err = devlink_trap_group_verify(group); + if (err) + goto err_trap_group_verify; + + err = devlink_trap_group_register(devlink, group); + if (err) + goto err_trap_group_register; + } + mutex_unlock(&devlink->lock); + + return 0; + +err_trap_group_register: +err_trap_group_verify: + for (i--; i >= 0; i--) + devlink_trap_group_unregister(devlink, &groups[i]); + mutex_unlock(&devlink->lock); + return err; +} +EXPORT_SYMBOL_GPL(devlink_trap_groups_register); + +/** + * devlink_trap_groups_unregister - Unregister packet trap groups from devlink. + * @devlink: devlink. + * @groups: Packet trap groups. + * @groups_count: Count of provided packet trap groups. + */ +void devlink_trap_groups_unregister(struct devlink *devlink, + const struct devlink_trap_group *groups, + size_t groups_count) +{ + int i; + + mutex_lock(&devlink->lock); + for (i = groups_count - 1; i >= 0; i--) + devlink_trap_group_unregister(devlink, &groups[i]); + mutex_unlock(&devlink->lock); +} +EXPORT_SYMBOL_GPL(devlink_trap_groups_unregister); + static void __devlink_compat_running_version(struct devlink *devlink, char *buf, size_t len) { diff --git a/net/core/filter.c b/net/core/filter.c index 4a08c9fb2be7..96350a743539 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4061,7 +4061,8 @@ BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map, if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK))) return -EINVAL; - if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data))) + if (unlikely(!xdp || + xdp_size > (unsigned long)(xdp->data_end - xdp->data))) return -EFAULT; return bpf_event_output(map, flags, meta, meta_size, xdp->data, @@ -4079,6 +4080,19 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = { .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; +static int bpf_xdp_output_btf_ids[5]; +const struct bpf_func_proto bpf_xdp_output_proto = { + .func = bpf_xdp_event_output, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, + .btf_id = bpf_xdp_output_btf_ids, +}; + BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb) { return skb->sk ? sock_gen_cookie(skb->sk) : 0; @@ -7139,6 +7153,27 @@ static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } +static struct bpf_insn *bpf_convert_shinfo_access(const struct bpf_insn *si, + struct bpf_insn *insn) +{ + /* si->dst_reg = skb_shinfo(SKB); */ +#ifdef NET_SKBUFF_DATA_USES_OFFSET + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), + BPF_REG_AX, si->src_reg, + offsetof(struct sk_buff, end)); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, head)); + *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX); +#else + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, end)); +#endif + + return insn; +} + static u32 bpf_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, @@ -7461,26 +7496,21 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, break; case offsetof(struct __sk_buff, gso_segs): - /* si->dst_reg = skb_shinfo(SKB); */ -#ifdef NET_SKBUFF_DATA_USES_OFFSET - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), - BPF_REG_AX, si->src_reg, - offsetof(struct sk_buff, end)); - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head), - si->dst_reg, si->src_reg, - offsetof(struct sk_buff, head)); - *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX); -#else - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), - si->dst_reg, si->src_reg, - offsetof(struct sk_buff, end)); -#endif + insn = bpf_convert_shinfo_access(si, insn); *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs), si->dst_reg, si->dst_reg, bpf_target_off(struct skb_shared_info, gso_segs, 2, target_size)); break; + case offsetof(struct __sk_buff, gso_size): + insn = bpf_convert_shinfo_access(si, insn); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_size), + si->dst_reg, si->dst_reg, + bpf_target_off(struct skb_shared_info, + gso_size, 2, + target_size)); + break; case offsetof(struct __sk_buff, wire_len): BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, pkt_len) != 4); @@ -8829,10 +8859,9 @@ const struct bpf_prog_ops sk_reuseport_prog_ops = { }; #endif /* CONFIG_INET */ -DEFINE_BPF_DISPATCHER(bpf_dispatcher_xdp) +DEFINE_BPF_DISPATCHER(xdp) void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog) { - bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(bpf_dispatcher_xdp), - prev_prog, prog); + bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog); } diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index d21348202ba6..7440e6117c81 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -188,6 +188,13 @@ void flow_action_cookie_destroy(struct flow_action_cookie *cookie) } EXPORT_SYMBOL(flow_action_cookie_destroy); +void flow_rule_match_ct(const struct flow_rule *rule, + struct flow_match_ct *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_CT, out); +} +EXPORT_SYMBOL(flow_rule_match_ct); + struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb, void *cb_ident, void *cb_priv, void (*release)(void *cb_priv)) diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 0642f91c4038..b4c87fe31be2 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -53,30 +53,60 @@ static void cgrp_css_free(struct cgroup_subsys_state *css) kfree(css_cls_state(css)); } +/* + * To avoid freezing of sockets creation for tasks with big number of threads + * and opened sockets lets release file_lock every 1000 iterated descriptors. + * New sockets will already have been created with new classid. + */ + +struct update_classid_context { + u32 classid; + unsigned int batch; +}; + +#define UPDATE_CLASSID_BATCH 1000 + static int update_classid_sock(const void *v, struct file *file, unsigned n) { int err; + struct update_classid_context *ctx = (void *)v; struct socket *sock = sock_from_file(file, &err); if (sock) { spin_lock(&cgroup_sk_update_lock); - sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, - (unsigned long)v); + sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid); spin_unlock(&cgroup_sk_update_lock); } + if (--ctx->batch == 0) { + ctx->batch = UPDATE_CLASSID_BATCH; + return n + 1; + } return 0; } +static void update_classid_task(struct task_struct *p, u32 classid) +{ + struct update_classid_context ctx = { + .classid = classid, + .batch = UPDATE_CLASSID_BATCH + }; + unsigned int fd = 0; + + do { + task_lock(p); + fd = iterate_fd(p->files, fd, update_classid_sock, &ctx); + task_unlock(p); + cond_resched(); + } while (fd); +} + static void cgrp_attach(struct cgroup_taskset *tset) { struct cgroup_subsys_state *css; struct task_struct *p; cgroup_taskset_for_each(p, css, tset) { - task_lock(p); - iterate_fd(p->files, 0, update_classid_sock, - (void *)(unsigned long)css_cls_state(css)->classid); - task_unlock(p); + update_classid_task(p, css_cls_state(css)->classid); } } @@ -98,10 +128,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, css_task_iter_start(css, 0, &it); while ((p = css_task_iter_next(&it))) { - task_lock(p); - iterate_fd(p->files, 0, update_classid_sock, - (void *)(unsigned long)cs->classid); - task_unlock(p); + update_classid_task(p, cs->classid); cond_resched(); } css_task_iter_end(&it); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e1101a4f90a6..621b4479fee1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3926,14 +3926,21 @@ normal: goto perform_csum_check; if (!sg) { - if (!nskb->remcsum_offload) - nskb->ip_summed = CHECKSUM_NONE; - SKB_GSO_CB(nskb)->csum = - skb_copy_and_csum_bits(head_skb, offset, - skb_put(nskb, len), - len, 0); - SKB_GSO_CB(nskb)->csum_start = - skb_headroom(nskb) + doffset; + if (!csum) { + if (!nskb->remcsum_offload) + nskb->ip_summed = CHECKSUM_NONE; + SKB_GSO_CB(nskb)->csum = + skb_copy_and_csum_bits(head_skb, offset, + skb_put(nskb, + len), + len, 0); + SKB_GSO_CB(nskb)->csum_start = + skb_headroom(nskb) + doffset; + } else { + skb_copy_bits(head_skb, offset, + skb_put(nskb, len), + len); + } continue; } diff --git a/net/core/sock.c b/net/core/sock.c index e4af4dbc1c9e..0fc8937a7ff4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1832,7 +1832,10 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) atomic_set(&newsk->sk_zckey, 0); sock_reset_flag(newsk, SOCK_DONE); - mem_cgroup_sk_alloc(newsk); + + /* sk->sk_memcg will be populated at accept() time */ + newsk->sk_memcg = NULL; + cgroup_sk_alloc(&newsk->sk_cgrp_data); rcu_read_lock(); diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 2e0f465295c3..a7075b3b4489 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -11,6 +11,7 @@ #include <linux/list.h> #include <linux/jhash.h> #include <linux/sock_diag.h> +#include <net/udp.h> struct bpf_stab { struct bpf_map map; @@ -141,12 +142,58 @@ static void sock_map_unref(struct sock *sk, void *link_raw) } } +static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock) +{ + struct proto *prot; + + sock_owned_by_me(sk); + + switch (sk->sk_type) { + case SOCK_STREAM: + prot = tcp_bpf_get_proto(sk, psock); + break; + + case SOCK_DGRAM: + prot = udp_bpf_get_proto(sk, psock); + break; + + default: + return -EINVAL; + } + + if (IS_ERR(prot)) + return PTR_ERR(prot); + + sk_psock_update_proto(sk, psock, prot); + return 0; +} + +static struct sk_psock *sock_map_psock_get_checked(struct sock *sk) +{ + struct sk_psock *psock; + + rcu_read_lock(); + psock = sk_psock(sk); + if (psock) { + if (sk->sk_prot->close != sock_map_close) { + psock = ERR_PTR(-EBUSY); + goto out; + } + + if (!refcount_inc_not_zero(&psock->refcnt)) + psock = ERR_PTR(-EBUSY); + } +out: + rcu_read_unlock(); + return psock; +} + static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs, struct sock *sk) { struct bpf_prog *msg_parser, *skb_parser, *skb_verdict; - bool skb_progs, sk_psock_is_new = false; struct sk_psock *psock; + bool skb_progs; int ret; skb_verdict = READ_ONCE(progs->skb_verdict); @@ -172,7 +219,7 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs, } } - psock = sk_psock_get_checked(sk); + psock = sock_map_psock_get_checked(sk); if (IS_ERR(psock)) { ret = PTR_ERR(psock); goto out_progs; @@ -191,18 +238,14 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs, ret = -ENOMEM; goto out_progs; } - sk_psock_is_new = true; } if (msg_parser) psock_set_prog(&psock->progs.msg_parser, msg_parser); - if (sk_psock_is_new) { - ret = tcp_bpf_init(sk); - if (ret < 0) - goto out_drop; - } else { - tcp_bpf_reinit(sk); - } + + ret = sock_map_init_proto(sk, psock); + if (ret < 0) + goto out_drop; write_lock_bh(&sk->sk_callback_lock); if (skb_progs && !psock->parser.enabled) { @@ -235,20 +278,17 @@ static int sock_map_link_no_progs(struct bpf_map *map, struct sock *sk) struct sk_psock *psock; int ret; - psock = sk_psock_get_checked(sk); + psock = sock_map_psock_get_checked(sk); if (IS_ERR(psock)) return PTR_ERR(psock); - if (psock) { - tcp_bpf_reinit(sk); - return 0; + if (!psock) { + psock = sk_psock_init(sk, map->numa_node); + if (!psock) + return -ENOMEM; } - psock = sk_psock_init(sk, map->numa_node); - if (!psock) - return -ENOMEM; - - ret = tcp_bpf_init(sk); + ret = sock_map_init_proto(sk, psock); if (ret < 0) sk_psock_put(sk, psock); return ret; @@ -384,7 +424,6 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx, struct sock *sk, u64 flags) { struct bpf_stab *stab = container_of(map, struct bpf_stab, map); - struct inet_connection_sock *icsk = inet_csk(sk); struct sk_psock_link *link; struct sk_psock *psock; struct sock *osk; @@ -395,7 +434,7 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx, return -EINVAL; if (unlikely(idx >= map->max_entries)) return -E2BIG; - if (unlikely(rcu_access_pointer(icsk->icsk_ulp_data))) + if (inet_csk_has_ulp(sk)) return -EINVAL; link = sk_psock_init_link(); @@ -448,15 +487,31 @@ static bool sock_map_op_okay(const struct bpf_sock_ops_kern *ops) ops->op == BPF_SOCK_OPS_TCP_LISTEN_CB; } -static bool sock_map_sk_is_suitable(const struct sock *sk) +static bool sk_is_tcp(const struct sock *sk) { return sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP; } +static bool sk_is_udp(const struct sock *sk) +{ + return sk->sk_type == SOCK_DGRAM && + sk->sk_protocol == IPPROTO_UDP; +} + +static bool sock_map_sk_is_suitable(const struct sock *sk) +{ + return sk_is_tcp(sk) || sk_is_udp(sk); +} + static bool sock_map_sk_state_allowed(const struct sock *sk) { - return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN); + if (sk_is_tcp(sk)) + return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN); + else if (sk_is_udp(sk)) + return sk_hashed(sk); + + return false; } static int sock_map_update_elem(struct bpf_map *map, void *key, @@ -738,7 +793,6 @@ static int sock_hash_update_common(struct bpf_map *map, void *key, struct sock *sk, u64 flags) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - struct inet_connection_sock *icsk = inet_csk(sk); u32 key_size = map->key_size, hash; struct bpf_htab_elem *elem, *elem_new; struct bpf_htab_bucket *bucket; @@ -749,7 +803,7 @@ static int sock_hash_update_common(struct bpf_map *map, void *key, WARN_ON_ONCE(!rcu_read_lock_held()); if (unlikely(flags > BPF_EXIST)) return -EINVAL; - if (unlikely(icsk->icsk_ulp_data)) + if (inet_csk_has_ulp(sk)) return -EINVAL; link = sk_psock_init_link(); @@ -1129,7 +1183,7 @@ int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, return 0; } -void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link) +static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link) { switch (link->map->map_type) { case BPF_MAP_TYPE_SOCKMAP: @@ -1142,3 +1196,54 @@ void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link) break; } } + +static void sock_map_remove_links(struct sock *sk, struct sk_psock *psock) +{ + struct sk_psock_link *link; + + while ((link = sk_psock_link_pop(psock))) { + sock_map_unlink(sk, link); + sk_psock_free_link(link); + } +} + +void sock_map_unhash(struct sock *sk) +{ + void (*saved_unhash)(struct sock *sk); + struct sk_psock *psock; + + rcu_read_lock(); + psock = sk_psock(sk); + if (unlikely(!psock)) { + rcu_read_unlock(); + if (sk->sk_prot->unhash) + sk->sk_prot->unhash(sk); + return; + } + + saved_unhash = psock->saved_unhash; + sock_map_remove_links(sk, psock); + rcu_read_unlock(); + saved_unhash(sk); +} + +void sock_map_close(struct sock *sk, long timeout) +{ + void (*saved_close)(struct sock *sk, long timeout); + struct sk_psock *psock; + + lock_sock(sk); + rcu_read_lock(); + psock = sk_psock(sk); + if (unlikely(!psock)) { + rcu_read_unlock(); + release_sock(sk); + return sk->sk_prot->close(sk, timeout); + } + + saved_close = psock->saved_close; + sock_map_remove_links(sk, psock); + rcu_read_unlock(); + release_sock(sk); + saved_close(sk, timeout); +} diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 08c3dc45f1a4..06b9983325cc 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1173,7 +1173,7 @@ make_route: if (dev_out->flags & IFF_LOOPBACK) flags |= RTCF_LOCAL; - rt = dst_alloc(&dn_dst_ops, dev_out, 0, DST_OBSOLETE_NONE, DST_HOST); + rt = dst_alloc(&dn_dst_ops, dev_out, 0, DST_OBSOLETE_NONE, 0); if (rt == NULL) goto e_nobufs; @@ -1439,7 +1439,7 @@ static int dn_route_input_slow(struct sk_buff *skb) } make_route: - rt = dst_alloc(&dn_dst_ops, out_dev, 1, DST_OBSOLETE_NONE, DST_HOST); + rt = dst_alloc(&dn_dst_ops, out_dev, 1, DST_OBSOLETE_NONE, 0); if (rt == NULL) goto e_nobufs; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index a7662e7a691d..760e6ea3178a 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -117,7 +117,9 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev, /* port.c */ int dsa_port_set_state(struct dsa_port *dp, u8 state, struct switchdev_trans *trans); +int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy); int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy); +void dsa_port_disable_rt(struct dsa_port *dp); void dsa_port_disable(struct dsa_port *dp); int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br); void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br); diff --git a/net/dsa/port.c b/net/dsa/port.c index d4450a454249..a18e65a474a5 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -63,7 +63,7 @@ static void dsa_port_set_state_now(struct dsa_port *dp, u8 state) pr_err("DSA: failed to set STP state %u (%d)\n", state, err); } -int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy) +int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy) { struct dsa_switch *ds = dp->ds; int port = dp->index; @@ -78,14 +78,31 @@ int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy) if (!dp->bridge_dev) dsa_port_set_state_now(dp, BR_STATE_FORWARDING); + if (dp->pl) + phylink_start(dp->pl); + return 0; } -void dsa_port_disable(struct dsa_port *dp) +int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy) +{ + int err; + + rtnl_lock(); + err = dsa_port_enable_rt(dp, phy); + rtnl_unlock(); + + return err; +} + +void dsa_port_disable_rt(struct dsa_port *dp) { struct dsa_switch *ds = dp->ds; int port = dp->index; + if (dp->pl) + phylink_stop(dp->pl); + if (!dp->bridge_dev) dsa_port_set_state_now(dp, BR_STATE_DISABLED); @@ -93,6 +110,13 @@ void dsa_port_disable(struct dsa_port *dp) ds->ops->port_disable(ds, port); } +void dsa_port_disable(struct dsa_port *dp) +{ + rtnl_lock(); + dsa_port_disable_rt(dp); + rtnl_unlock(); +} + int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br) { struct dsa_notifier_bridge_info info = { @@ -433,6 +457,7 @@ static void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config, { struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); struct dsa_switch *ds = dp->ds; + int err; /* Only called for inband modes */ if (!ds->ops->phylink_mac_link_state) { @@ -440,8 +465,12 @@ static void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config, return; } - if (ds->ops->phylink_mac_link_state(ds, dp->index, state) < 0) + err = ds->ops->phylink_mac_link_state(ds, dp->index, state); + if (err < 0) { + dev_err(ds->dev, "p%d: phylink_mac_link_state() failed: %d\n", + dp->index, err); state->link = 0; + } } static void dsa_port_phylink_mac_config(struct phylink_config *config, @@ -617,10 +646,6 @@ static int dsa_port_phylink_register(struct dsa_port *dp) goto err_phy_connect; } - rtnl_lock(); - phylink_start(dp->pl); - rtnl_unlock(); - return 0; err_phy_connect: @@ -631,9 +656,14 @@ err_phy_connect: int dsa_port_link_register_of(struct dsa_port *dp) { struct dsa_switch *ds = dp->ds; + struct device_node *phy_np; - if (!ds->ops->adjust_link) - return dsa_port_phylink_register(dp); + if (!ds->ops->adjust_link) { + phy_np = of_parse_phandle(dp->dn, "phy-handle", 0); + if (of_phy_is_fixed_link(dp->dn) || phy_np) + return dsa_port_phylink_register(dp); + return 0; + } dev_warn(ds->dev, "Using legacy PHYLIB callbacks. Please migrate to PHYLINK!\n"); @@ -648,11 +678,12 @@ void dsa_port_link_unregister_of(struct dsa_port *dp) { struct dsa_switch *ds = dp->ds; - if (!ds->ops->adjust_link) { + if (!ds->ops->adjust_link && dp->pl) { rtnl_lock(); phylink_disconnect_phy(dp->pl); rtnl_unlock(); phylink_destroy(dp->pl); + dp->pl = NULL; return; } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index fca9bfa8437e..5f782fa3029f 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -88,12 +88,10 @@ static int dsa_slave_open(struct net_device *dev) goto clear_allmulti; } - err = dsa_port_enable(dp, dev->phydev); + err = dsa_port_enable_rt(dp, dev->phydev); if (err) goto clear_promisc; - phylink_start(dp->pl); - return 0; clear_promisc: @@ -114,9 +112,7 @@ static int dsa_slave_close(struct net_device *dev) struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); - phylink_stop(dp->pl); - - dsa_port_disable(dp); + dsa_port_disable_rt(dp); dev_mc_unsync(master, dev); dev_uc_unsync(master, dev); @@ -865,8 +861,8 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev, if (!flow_offload_has_one_action(&cls->rule->action)) return err; - if (!flow_action_basic_hw_stats_types_check(&cls->rule->action, - cls->common.extack)) + if (!flow_action_basic_hw_stats_check(&cls->rule->action, + cls->common.extack)) return err; act = &cls->rule->action.entries[0]; diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 9c3114179690..0d3f796d14a3 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -142,6 +142,27 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb, return skb; } + +static int brcm_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto, + int *offset) +{ + /* We have been called on the DSA master network device after + * eth_type_trans() which pulled the Ethernet header already. + * Frames have one of these two layouts: + * ----------------------------------- + * | MAC DA | MAC SA | 4b tag | Type | DSA_TAG_PROTO_BRCM + * ----------------------------------- + * ----------------------------------- + * | 4b tag | MAC DA | MAC SA | Type | DSA_TAG_PROTO_BRCM_PREPEND + * ----------------------------------- + * skb->data points 2 bytes before the actual Ethernet type field and + * we have an offset of 4bytes between where skb->data and where the + * payload starts. + */ + *offset = BRCM_TAG_LEN; + *proto = ((__be16 *)skb->data)[1]; + return 0; +} #endif #if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM) @@ -177,6 +198,7 @@ static const struct dsa_device_ops brcm_netdev_ops = { .xmit = brcm_tag_xmit, .rcv = brcm_tag_rcv, .overhead = BRCM_TAG_LEN, + .flow_dissect = brcm_tag_flow_dissect, }; DSA_TAG_DRIVER(brcm_netdev_ops); @@ -205,6 +227,7 @@ static const struct dsa_device_ops brcm_prepend_netdev_ops = { .xmit = brcm_tag_xmit_prepend, .rcv = brcm_tag_rcv_prepend, .overhead = BRCM_TAG_LEN, + .flow_dissect = brcm_tag_flow_dissect, }; DSA_TAG_DRIVER(brcm_prepend_netdev_ops); diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 424545a4aaec..b0bd3decad02 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -5,4 +5,5 @@ obj-y += ioctl.o common.o obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \ - linkstate.o debug.o wol.o + linkstate.o debug.o wol.o features.o privflags.o rings.o \ + channels.o diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c index ef9197541cb3..dae7402eaca3 100644 --- a/net/ethtool/bitset.c +++ b/net/ethtool/bitset.c @@ -588,6 +588,100 @@ int ethnl_update_bitset32(u32 *bitmap, unsigned int nbits, return 0; } +/** + * ethnl_parse_bitset() - Compute effective value and mask from bitset nest + * @val: unsigned long based bitmap to put value into + * @mask: unsigned long based bitmap to put mask into + * @nbits: size of @val and @mask bitmaps + * @attr: nest attribute to parse and apply + * @names: array of bit names; may be null for compact format + * @extack: extack for error reporting + * + * Provide @nbits size long bitmaps for value and mask so that + * x = (val & mask) | (x & ~mask) would modify any @nbits sized bitmap x + * the same way ethnl_update_bitset() with the same bitset attribute would. + * + * Return: negative error code on failure, 0 on success + */ +int ethnl_parse_bitset(unsigned long *val, unsigned long *mask, + unsigned int nbits, const struct nlattr *attr, + ethnl_string_array_t names, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[ETHTOOL_A_BITSET_MAX + 1]; + const struct nlattr *bit_attr; + bool no_mask; + int rem; + int ret; + + if (!attr) + return 0; + ret = nla_parse_nested(tb, ETHTOOL_A_BITSET_MAX, attr, bitset_policy, + extack); + if (ret < 0) + return ret; + no_mask = tb[ETHTOOL_A_BITSET_NOMASK]; + + if (!tb[ETHTOOL_A_BITSET_BITS]) { + unsigned int change_bits; + + ret = ethnl_compact_sanity_checks(nbits, attr, tb, extack); + if (ret < 0) + return ret; + + change_bits = nla_get_u32(tb[ETHTOOL_A_BITSET_SIZE]); + bitmap_from_arr32(val, nla_data(tb[ETHTOOL_A_BITSET_VALUE]), + change_bits); + if (change_bits < nbits) + bitmap_clear(val, change_bits, nbits - change_bits); + if (no_mask) { + bitmap_fill(mask, nbits); + } else { + bitmap_from_arr32(mask, + nla_data(tb[ETHTOOL_A_BITSET_MASK]), + change_bits); + if (change_bits < nbits) + bitmap_clear(mask, change_bits, + nbits - change_bits); + } + + return 0; + } + + if (tb[ETHTOOL_A_BITSET_VALUE]) { + NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_VALUE], + "value only allowed in compact bitset"); + return -EINVAL; + } + if (tb[ETHTOOL_A_BITSET_MASK]) { + NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_MASK], + "mask only allowed in compact bitset"); + return -EINVAL; + } + + bitmap_zero(val, nbits); + if (no_mask) + bitmap_fill(mask, nbits); + else + bitmap_zero(mask, nbits); + + nla_for_each_nested(bit_attr, tb[ETHTOOL_A_BITSET_BITS], rem) { + unsigned int idx; + bool bit_val; + + ret = ethnl_parse_bit(&idx, &bit_val, nbits, bit_attr, no_mask, + names, extack); + if (ret < 0) + return ret; + if (bit_val) + __set_bit(idx, val); + if (!no_mask) + __set_bit(idx, mask); + } + + return 0; +} + #if BITS_PER_LONG == 64 && defined(__BIG_ENDIAN) /* 64-bit big endian architectures are the only case when u32 based bitmaps diff --git a/net/ethtool/bitset.h b/net/ethtool/bitset.h index b849f9d19676..c2c2e0051d00 100644 --- a/net/ethtool/bitset.h +++ b/net/ethtool/bitset.h @@ -26,5 +26,9 @@ int ethnl_update_bitset(unsigned long *bitmap, unsigned int nbits, int ethnl_update_bitset32(u32 *bitmap, unsigned int nbits, const struct nlattr *attr, ethnl_string_array_t names, struct netlink_ext_ack *extack, bool *mod); +int ethnl_parse_bitset(unsigned long *val, unsigned long *mask, + unsigned int nbits, const struct nlattr *attr, + ethnl_string_array_t names, + struct netlink_ext_ack *extack); #endif /* _NET_ETHTOOL_BITSET_H */ diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c new file mode 100644 index 000000000000..389924b65d05 --- /dev/null +++ b/net/ethtool/channels.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <net/xdp_sock.h> + +#include "netlink.h" +#include "common.h" + +struct channels_req_info { + struct ethnl_req_info base; +}; + +struct channels_reply_data { + struct ethnl_reply_data base; + struct ethtool_channels channels; +}; + +#define CHANNELS_REPDATA(__reply_base) \ + container_of(__reply_base, struct channels_reply_data, base) + +static const struct nla_policy +channels_get_policy[ETHTOOL_A_CHANNELS_MAX + 1] = { + [ETHTOOL_A_CHANNELS_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_HEADER] = { .type = NLA_NESTED }, + [ETHTOOL_A_CHANNELS_RX_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_TX_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_OTHER_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_COMBINED_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_RX_COUNT] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_TX_COUNT] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_OTHER_COUNT] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_COMBINED_COUNT] = { .type = NLA_REJECT }, +}; + +static int channels_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + struct genl_info *info) +{ + struct channels_reply_data *data = CHANNELS_REPDATA(reply_base); + struct net_device *dev = reply_base->dev; + int ret; + + if (!dev->ethtool_ops->get_channels) + return -EOPNOTSUPP; + ret = ethnl_ops_begin(dev); + if (ret < 0) + return ret; + dev->ethtool_ops->get_channels(dev, &data->channels); + ethnl_ops_complete(dev); + + return 0; +} + +static int channels_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + return nla_total_size(sizeof(u32)) + /* _CHANNELS_RX_MAX */ + nla_total_size(sizeof(u32)) + /* _CHANNELS_TX_MAX */ + nla_total_size(sizeof(u32)) + /* _CHANNELS_OTHER_MAX */ + nla_total_size(sizeof(u32)) + /* _CHANNELS_COMBINED_MAX */ + nla_total_size(sizeof(u32)) + /* _CHANNELS_RX_COUNT */ + nla_total_size(sizeof(u32)) + /* _CHANNELS_TX_COUNT */ + nla_total_size(sizeof(u32)) + /* _CHANNELS_OTHER_COUNT */ + nla_total_size(sizeof(u32)); /* _CHANNELS_COMBINED_COUNT */ +} + +static int channels_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct channels_reply_data *data = CHANNELS_REPDATA(reply_base); + const struct ethtool_channels *channels = &data->channels; + + if ((channels->max_rx && + (nla_put_u32(skb, ETHTOOL_A_CHANNELS_RX_MAX, + channels->max_rx) || + nla_put_u32(skb, ETHTOOL_A_CHANNELS_RX_COUNT, + channels->rx_count))) || + (channels->max_tx && + (nla_put_u32(skb, ETHTOOL_A_CHANNELS_TX_MAX, + channels->max_tx) || + nla_put_u32(skb, ETHTOOL_A_CHANNELS_TX_COUNT, + channels->tx_count))) || + (channels->max_other && + (nla_put_u32(skb, ETHTOOL_A_CHANNELS_OTHER_MAX, + channels->max_other) || + nla_put_u32(skb, ETHTOOL_A_CHANNELS_OTHER_COUNT, + channels->other_count))) || + (channels->max_combined && + (nla_put_u32(skb, ETHTOOL_A_CHANNELS_COMBINED_MAX, + channels->max_combined) || + nla_put_u32(skb, ETHTOOL_A_CHANNELS_COMBINED_COUNT, + channels->combined_count)))) + return -EMSGSIZE; + + return 0; +} + +const struct ethnl_request_ops ethnl_channels_request_ops = { + .request_cmd = ETHTOOL_MSG_CHANNELS_GET, + .reply_cmd = ETHTOOL_MSG_CHANNELS_GET_REPLY, + .hdr_attr = ETHTOOL_A_CHANNELS_HEADER, + .max_attr = ETHTOOL_A_CHANNELS_MAX, + .req_info_size = sizeof(struct channels_req_info), + .reply_data_size = sizeof(struct channels_reply_data), + .request_policy = channels_get_policy, + + .prepare_data = channels_prepare_data, + .reply_size = channels_reply_size, + .fill_reply = channels_fill_reply, +}; + +/* CHANNELS_SET */ + +static const struct nla_policy +channels_set_policy[ETHTOOL_A_CHANNELS_MAX + 1] = { + [ETHTOOL_A_CHANNELS_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_HEADER] = { .type = NLA_NESTED }, + [ETHTOOL_A_CHANNELS_RX_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_TX_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_OTHER_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_COMBINED_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_RX_COUNT] = { .type = NLA_U32 }, + [ETHTOOL_A_CHANNELS_TX_COUNT] = { .type = NLA_U32 }, + [ETHTOOL_A_CHANNELS_OTHER_COUNT] = { .type = NLA_U32 }, + [ETHTOOL_A_CHANNELS_COMBINED_COUNT] = { .type = NLA_U32 }, +}; + +int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *tb[ETHTOOL_A_CHANNELS_MAX + 1]; + unsigned int from_channel, old_total, i; + struct ethtool_channels channels = {}; + struct ethnl_req_info req_info = {}; + const struct nlattr *err_attr; + const struct ethtool_ops *ops; + struct net_device *dev; + u32 max_rx_in_use = 0; + bool mod = false; + int ret; + + ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb, + ETHTOOL_A_CHANNELS_MAX, channels_set_policy, + info->extack); + if (ret < 0) + return ret; + ret = ethnl_parse_header_dev_get(&req_info, + tb[ETHTOOL_A_CHANNELS_HEADER], + genl_info_net(info), info->extack, + true); + if (ret < 0) + return ret; + dev = req_info.dev; + ops = dev->ethtool_ops; + ret = -EOPNOTSUPP; + if (!ops->get_channels || !ops->set_channels) + goto out_dev; + + rtnl_lock(); + ret = ethnl_ops_begin(dev); + if (ret < 0) + goto out_rtnl; + ops->get_channels(dev, &channels); + old_total = channels.combined_count + + max(channels.rx_count, channels.tx_count); + + ethnl_update_u32(&channels.rx_count, tb[ETHTOOL_A_CHANNELS_RX_COUNT], + &mod); + ethnl_update_u32(&channels.tx_count, tb[ETHTOOL_A_CHANNELS_TX_COUNT], + &mod); + ethnl_update_u32(&channels.other_count, + tb[ETHTOOL_A_CHANNELS_OTHER_COUNT], &mod); + ethnl_update_u32(&channels.combined_count, + tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT], &mod); + ret = 0; + if (!mod) + goto out_ops; + + /* ensure new channel counts are within limits */ + if (channels.rx_count > channels.max_rx) + err_attr = tb[ETHTOOL_A_CHANNELS_RX_COUNT]; + else if (channels.tx_count > channels.max_tx) + err_attr = tb[ETHTOOL_A_CHANNELS_TX_COUNT]; + else if (channels.other_count > channels.max_other) + err_attr = tb[ETHTOOL_A_CHANNELS_OTHER_COUNT]; + else if (channels.combined_count > channels.max_combined) + err_attr = tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT]; + else + err_attr = NULL; + if (err_attr) { + ret = -EINVAL; + NL_SET_ERR_MSG_ATTR(info->extack, err_attr, + "requested channel count exceeds maximum"); + goto out_ops; + } + + /* ensure the new Rx count fits within the configured Rx flow + * indirection table settings + */ + if (netif_is_rxfh_configured(dev) && + !ethtool_get_max_rxfh_channel(dev, &max_rx_in_use) && + (channels.combined_count + channels.rx_count) <= max_rx_in_use) { + GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing indirection table settings"); + return -EINVAL; + } + + /* Disabling channels, query zero-copy AF_XDP sockets */ + from_channel = channels.combined_count + + min(channels.rx_count, channels.tx_count); + for (i = from_channel; i < old_total; i++) + if (xdp_get_umem_from_qid(dev, i)) { + GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing zerocopy AF_XDP sockets"); + return -EINVAL; + } + + ret = dev->ethtool_ops->set_channels(dev, &channels); + if (ret < 0) + goto out_ops; + ethtool_notify(dev, ETHTOOL_MSG_CHANNELS_NTF, NULL); + +out_ops: + ethnl_ops_complete(dev); +out_rtnl: + rtnl_unlock(); +out_dev: + dev_put(dev); + return ret; +} diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 7b6969af5ae7..dab047eec943 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -258,3 +258,45 @@ int __ethtool_get_link(struct net_device *dev) return netif_running(dev) && dev->ethtool_ops->get_link(dev); } + +int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max) +{ + u32 dev_size, current_max = 0; + u32 *indir; + int ret; + + if (!dev->ethtool_ops->get_rxfh_indir_size || + !dev->ethtool_ops->get_rxfh) + return -EOPNOTSUPP; + dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); + if (dev_size == 0) + return -EOPNOTSUPP; + + indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER); + if (!indir) + return -ENOMEM; + + ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL); + if (ret) + goto out; + + while (dev_size--) + current_max = max(current_max, indir[dev_size]); + + *max = current_max; + +out: + kfree(indir); + return ret; +} + +int ethtool_check_ops(const struct ethtool_ops *ops) +{ + if (WARN_ON(ops->set_coalesce && !ops->supported_coalesce_params)) + return -EINVAL; + /* NOTE: sufficiently insane drivers may swap ethtool_ops at runtime, + * the fact that ops are checked at registration time does not + * mean the ops attached to a netdev later on are sane. + */ + return 0; +} diff --git a/net/ethtool/common.h b/net/ethtool/common.h index 40ba74e0b9bb..03946e16e623 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -6,6 +6,8 @@ #include <linux/netdevice.h> #include <linux/ethtool.h> +#define ETHTOOL_DEV_FEATURE_WORDS DIV_ROUND_UP(NETDEV_FEATURE_COUNT, 32) + /* compose link mode index from speed, type and duplex */ #define ETHTOOL_LINK_MODE(speed, type, duplex) \ ETHTOOL_LINK_MODE_ ## speed ## base ## type ## _ ## duplex ## _BIT @@ -27,5 +29,6 @@ int __ethtool_get_link(struct net_device *dev); bool convert_legacy_settings_to_link_ksettings( struct ethtool_link_ksettings *link_ksettings, const struct ethtool_cmd *legacy_settings); +int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max); #endif /* _ETHTOOL_COMMON_H */ diff --git a/net/ethtool/debug.c b/net/ethtool/debug.c index aaef4843e6ba..87f288ee20c8 100644 --- a/net/ethtool/debug.c +++ b/net/ethtool/debug.c @@ -102,8 +102,10 @@ int ethnl_set_debug(struct sk_buff *skb, struct genl_info *info) info->extack); if (ret < 0) return ret; - ret = ethnl_parse_header(&req_info, tb[ETHTOOL_A_DEBUG_HEADER], - genl_info_net(info), info->extack, true); + ret = ethnl_parse_header_dev_get(&req_info, + tb[ETHTOOL_A_DEBUG_HEADER], + genl_info_net(info), info->extack, + true); if (ret < 0) return ret; dev = req_info.dev; diff --git a/net/ethtool/features.c b/net/ethtool/features.c new file mode 100644 index 000000000000..4e632dc987d8 --- /dev/null +++ b/net/ethtool/features.c @@ -0,0 +1,304 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "netlink.h" +#include "common.h" +#include "bitset.h" + +struct features_req_info { + struct ethnl_req_info base; +}; + +struct features_reply_data { + struct ethnl_reply_data base; + u32 hw[ETHTOOL_DEV_FEATURE_WORDS]; + u32 wanted[ETHTOOL_DEV_FEATURE_WORDS]; + u32 active[ETHTOOL_DEV_FEATURE_WORDS]; + u32 nochange[ETHTOOL_DEV_FEATURE_WORDS]; + u32 all[ETHTOOL_DEV_FEATURE_WORDS]; +}; + +#define FEATURES_REPDATA(__reply_base) \ + container_of(__reply_base, struct features_reply_data, base) + +static const struct nla_policy +features_get_policy[ETHTOOL_A_FEATURES_MAX + 1] = { + [ETHTOOL_A_FEATURES_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_FEATURES_HEADER] = { .type = NLA_NESTED }, + [ETHTOOL_A_FEATURES_HW] = { .type = NLA_REJECT }, + [ETHTOOL_A_FEATURES_WANTED] = { .type = NLA_REJECT }, + [ETHTOOL_A_FEATURES_ACTIVE] = { .type = NLA_REJECT }, + [ETHTOOL_A_FEATURES_NOCHANGE] = { .type = NLA_REJECT }, +}; + +static void ethnl_features_to_bitmap32(u32 *dest, netdev_features_t src) +{ + unsigned int i; + + for (i = 0; i < ETHTOOL_DEV_FEATURE_WORDS; i++) + dest[i] = src >> (32 * i); +} + +static int features_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + struct genl_info *info) +{ + struct features_reply_data *data = FEATURES_REPDATA(reply_base); + struct net_device *dev = reply_base->dev; + netdev_features_t all_features; + + ethnl_features_to_bitmap32(data->hw, dev->hw_features); + ethnl_features_to_bitmap32(data->wanted, dev->wanted_features); + ethnl_features_to_bitmap32(data->active, dev->features); + ethnl_features_to_bitmap32(data->nochange, NETIF_F_NEVER_CHANGE); + all_features = GENMASK_ULL(NETDEV_FEATURE_COUNT - 1, 0); + ethnl_features_to_bitmap32(data->all, all_features); + + return 0; +} + +static int features_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct features_reply_data *data = FEATURES_REPDATA(reply_base); + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + unsigned int len = 0; + int ret; + + ret = ethnl_bitset32_size(data->hw, data->all, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + return ret; + len += ret; + ret = ethnl_bitset32_size(data->wanted, NULL, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + return ret; + len += ret; + ret = ethnl_bitset32_size(data->active, NULL, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + return ret; + len += ret; + ret = ethnl_bitset32_size(data->nochange, NULL, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + return ret; + len += ret; + + return len; +} + +static int features_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct features_reply_data *data = FEATURES_REPDATA(reply_base); + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + int ret; + + ret = ethnl_put_bitset32(skb, ETHTOOL_A_FEATURES_HW, data->hw, + data->all, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + return ret; + ret = ethnl_put_bitset32(skb, ETHTOOL_A_FEATURES_WANTED, data->wanted, + NULL, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + return ret; + ret = ethnl_put_bitset32(skb, ETHTOOL_A_FEATURES_ACTIVE, data->active, + NULL, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + return ret; + return ethnl_put_bitset32(skb, ETHTOOL_A_FEATURES_NOCHANGE, + data->nochange, NULL, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); +} + +const struct ethnl_request_ops ethnl_features_request_ops = { + .request_cmd = ETHTOOL_MSG_FEATURES_GET, + .reply_cmd = ETHTOOL_MSG_FEATURES_GET_REPLY, + .hdr_attr = ETHTOOL_A_FEATURES_HEADER, + .max_attr = ETHTOOL_A_FEATURES_MAX, + .req_info_size = sizeof(struct features_req_info), + .reply_data_size = sizeof(struct features_reply_data), + .request_policy = features_get_policy, + + .prepare_data = features_prepare_data, + .reply_size = features_reply_size, + .fill_reply = features_fill_reply, +}; + +/* FEATURES_SET */ + +static const struct nla_policy +features_set_policy[ETHTOOL_A_FEATURES_MAX + 1] = { + [ETHTOOL_A_FEATURES_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_FEATURES_HEADER] = { .type = NLA_NESTED }, + [ETHTOOL_A_FEATURES_HW] = { .type = NLA_REJECT }, + [ETHTOOL_A_FEATURES_WANTED] = { .type = NLA_NESTED }, + [ETHTOOL_A_FEATURES_ACTIVE] = { .type = NLA_REJECT }, + [ETHTOOL_A_FEATURES_NOCHANGE] = { .type = NLA_REJECT }, +}; + +static void ethnl_features_to_bitmap(unsigned long *dest, netdev_features_t val) +{ + const unsigned int words = BITS_TO_LONGS(NETDEV_FEATURE_COUNT); + unsigned int i; + + bitmap_zero(dest, NETDEV_FEATURE_COUNT); + for (i = 0; i < words; i++) + dest[i] = (unsigned long)(val >> (i * BITS_PER_LONG)); +} + +static netdev_features_t ethnl_bitmap_to_features(unsigned long *src) +{ + const unsigned int nft_bits = sizeof(netdev_features_t) * BITS_PER_BYTE; + const unsigned int words = BITS_TO_LONGS(NETDEV_FEATURE_COUNT); + netdev_features_t ret = 0; + unsigned int i; + + for (i = 0; i < words; i++) + ret |= (netdev_features_t)(src[i]) << (i * BITS_PER_LONG); + ret &= ~(netdev_features_t)0 >> (nft_bits - NETDEV_FEATURE_COUNT); + return ret; +} + +static int features_send_reply(struct net_device *dev, struct genl_info *info, + const unsigned long *wanted, + const unsigned long *wanted_mask, + const unsigned long *active, + const unsigned long *active_mask, bool compact) +{ + struct sk_buff *rskb; + void *reply_payload; + int reply_len = 0; + int ret; + + reply_len = ethnl_reply_header_size(); + ret = ethnl_bitset_size(wanted, wanted_mask, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + goto err; + reply_len += ret; + ret = ethnl_bitset_size(active, active_mask, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + goto err; + reply_len += ret; + + ret = -ENOMEM; + rskb = ethnl_reply_init(reply_len, dev, ETHTOOL_MSG_FEATURES_SET_REPLY, + ETHTOOL_A_FEATURES_HEADER, info, + &reply_payload); + if (!rskb) + goto err; + + ret = ethnl_put_bitset(rskb, ETHTOOL_A_FEATURES_WANTED, wanted, + wanted_mask, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + goto nla_put_failure; + ret = ethnl_put_bitset(rskb, ETHTOOL_A_FEATURES_ACTIVE, active, + active_mask, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + goto nla_put_failure; + + genlmsg_end(rskb, reply_payload); + ret = genlmsg_reply(rskb, info); + return ret; + +nla_put_failure: + nlmsg_free(rskb); + WARN_ONCE(1, "calculated message payload length (%d) not sufficient\n", + reply_len); +err: + GENL_SET_ERR_MSG(info, "failed to send reply message"); + return ret; +} + +int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) +{ + DECLARE_BITMAP(wanted_diff_mask, NETDEV_FEATURE_COUNT); + DECLARE_BITMAP(active_diff_mask, NETDEV_FEATURE_COUNT); + DECLARE_BITMAP(old_active, NETDEV_FEATURE_COUNT); + DECLARE_BITMAP(new_active, NETDEV_FEATURE_COUNT); + DECLARE_BITMAP(req_wanted, NETDEV_FEATURE_COUNT); + DECLARE_BITMAP(req_mask, NETDEV_FEATURE_COUNT); + struct nlattr *tb[ETHTOOL_A_FEATURES_MAX + 1]; + struct ethnl_req_info req_info = {}; + struct net_device *dev; + bool mod; + int ret; + + ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb, + ETHTOOL_A_FEATURES_MAX, features_set_policy, + info->extack); + if (ret < 0) + return ret; + if (!tb[ETHTOOL_A_FEATURES_WANTED]) + return -EINVAL; + ret = ethnl_parse_header_dev_get(&req_info, + tb[ETHTOOL_A_FEATURES_HEADER], + genl_info_net(info), info->extack, + true); + if (ret < 0) + return ret; + dev = req_info.dev; + + rtnl_lock(); + ethnl_features_to_bitmap(old_active, dev->features); + ret = ethnl_parse_bitset(req_wanted, req_mask, NETDEV_FEATURE_COUNT, + tb[ETHTOOL_A_FEATURES_WANTED], + netdev_features_strings, info->extack); + if (ret < 0) + goto out_rtnl; + if (ethnl_bitmap_to_features(req_mask) & ~NETIF_F_ETHTOOL_BITS) { + GENL_SET_ERR_MSG(info, "attempt to change non-ethtool features"); + ret = -EINVAL; + goto out_rtnl; + } + + /* set req_wanted bits not in req_mask from old_active */ + bitmap_and(req_wanted, req_wanted, req_mask, NETDEV_FEATURE_COUNT); + bitmap_andnot(new_active, old_active, req_mask, NETDEV_FEATURE_COUNT); + bitmap_or(req_wanted, new_active, req_wanted, NETDEV_FEATURE_COUNT); + if (bitmap_equal(req_wanted, old_active, NETDEV_FEATURE_COUNT)) { + ret = 0; + goto out_rtnl; + } + + dev->wanted_features = ethnl_bitmap_to_features(req_wanted); + __netdev_update_features(dev); + ethnl_features_to_bitmap(new_active, dev->features); + mod = !bitmap_equal(old_active, new_active, NETDEV_FEATURE_COUNT); + + ret = 0; + if (!(req_info.flags & ETHTOOL_FLAG_OMIT_REPLY)) { + bool compact = req_info.flags & ETHTOOL_FLAG_COMPACT_BITSETS; + + bitmap_xor(wanted_diff_mask, req_wanted, new_active, + NETDEV_FEATURE_COUNT); + bitmap_xor(active_diff_mask, old_active, new_active, + NETDEV_FEATURE_COUNT); + bitmap_and(wanted_diff_mask, wanted_diff_mask, req_mask, + NETDEV_FEATURE_COUNT); + bitmap_and(req_wanted, req_wanted, wanted_diff_mask, + NETDEV_FEATURE_COUNT); + bitmap_and(new_active, new_active, active_diff_mask, + NETDEV_FEATURE_COUNT); + + ret = features_send_reply(dev, info, req_wanted, + wanted_diff_mask, new_active, + active_diff_mask, compact); + } + if (mod) + ethtool_notify(dev, ETHTOOL_MSG_FEATURES_NTF, NULL); + +out_rtnl: + rtnl_unlock(); + dev_put(dev); + return ret; +} diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index b2684ffa26de..3852a58d7f95 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -56,8 +56,6 @@ EXPORT_SYMBOL(ethtool_op_get_ts_info); /* Handlers for each ethtool command */ -#define ETHTOOL_DEV_FEATURE_WORDS ((NETDEV_FEATURE_COUNT + 31) / 32) - static int ethtool_get_features(struct net_device *dev, void __user *useraddr) { struct ethtool_gfeatures cmd = { @@ -198,13 +196,14 @@ static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd) switch (eth_cmd) { case ETHTOOL_GTXCSUM: case ETHTOOL_STXCSUM: - return NETIF_F_CSUM_MASK | NETIF_F_SCTP_CRC; + return NETIF_F_CSUM_MASK | NETIF_F_FCOE_CRC_BIT | + NETIF_F_SCTP_CRC; case ETHTOOL_GRXCSUM: case ETHTOOL_SRXCSUM: return NETIF_F_RXCSUM; case ETHTOOL_GSG: case ETHTOOL_SSG: - return NETIF_F_SG; + return NETIF_F_SG | NETIF_F_FRAGLIST; case ETHTOOL_GTSO: case ETHTOOL_STSO: return NETIF_F_ALL_TSO; @@ -930,37 +929,6 @@ void netdev_rss_key_fill(void *buffer, size_t len) } EXPORT_SYMBOL(netdev_rss_key_fill); -static int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max) -{ - u32 dev_size, current_max = 0; - u32 *indir; - int ret; - - if (!dev->ethtool_ops->get_rxfh_indir_size || - !dev->ethtool_ops->get_rxfh) - return -EOPNOTSUPP; - dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); - if (dev_size == 0) - return -EOPNOTSUPP; - - indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER); - if (!indir) - return -ENOMEM; - - ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL); - if (ret) - goto out; - - while (dev_size--) - current_max = max(current_max, indir[dev_size]); - - *max = current_max; - -out: - kfree(indir); - return ret; -} - static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, void __user *useraddr) { @@ -1551,9 +1519,6 @@ ethtool_set_coalesce_supported(struct net_device *dev, u32 supported_params = dev->ethtool_ops->supported_coalesce_params; u32 nonzero_params = 0; - if (!supported_params) - return true; - if (coalesce->rx_coalesce_usecs) nonzero_params |= ETHTOOL_COALESCE_RX_USECS; if (coalesce->rx_max_coalesced_frames) @@ -1636,6 +1601,7 @@ static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr) static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr) { struct ethtool_ringparam ringparam, max = { .cmd = ETHTOOL_GRINGPARAM }; + int ret; if (!dev->ethtool_ops->set_ringparam || !dev->ethtool_ops->get_ringparam) return -EOPNOTSUPP; @@ -1652,7 +1618,10 @@ static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr) ringparam.tx_pending > max.tx_max_pending) return -EINVAL; - return dev->ethtool_ops->set_ringparam(dev, &ringparam); + ret = dev->ethtool_ops->set_ringparam(dev, &ringparam); + if (!ret) + ethtool_notify(dev, ETHTOOL_MSG_RINGS_NTF, NULL); + return ret; } static noinline_for_stack int ethtool_get_channels(struct net_device *dev, @@ -1677,6 +1646,7 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev, u16 from_channel, to_channel; u32 max_rx_in_use = 0; unsigned int i; + int ret; if (!dev->ethtool_ops->set_channels || !dev->ethtool_ops->get_channels) return -EOPNOTSUPP; @@ -1708,7 +1678,10 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev, if (xdp_get_umem_from_qid(dev, i)) return -EINVAL; - return dev->ethtool_ops->set_channels(dev, &channels); + ret = dev->ethtool_ops->set_channels(dev, &channels); + if (!ret) + ethtool_notify(dev, ETHTOOL_MSG_CHANNELS_NTF, NULL); + return ret; } static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr) @@ -2717,6 +2690,8 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GPFLAGS: rc = ethtool_get_value(dev, useraddr, ethcmd, dev->ethtool_ops->get_priv_flags); + if (!rc) + ethtool_notify(dev, ETHTOOL_MSG_PRIVFLAGS_NTF, NULL); break; case ETHTOOL_SPFLAGS: rc = ethtool_set_value(dev, useraddr, diff --git a/net/ethtool/linkinfo.c b/net/ethtool/linkinfo.c index 5d16cb4e8693..2df420068cbb 100644 --- a/net/ethtool/linkinfo.c +++ b/net/ethtool/linkinfo.c @@ -121,8 +121,10 @@ int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info) info->extack); if (ret < 0) return ret; - ret = ethnl_parse_header(&req_info, tb[ETHTOOL_A_LINKINFO_HEADER], - genl_info_net(info), info->extack, true); + ret = ethnl_parse_header_dev_get(&req_info, + tb[ETHTOOL_A_LINKINFO_HEADER], + genl_info_net(info), info->extack, + true); if (ret < 0) return ret; dev = req_info.dev; diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c index f049b97072fe..cb29cc8c5960 100644 --- a/net/ethtool/linkmodes.c +++ b/net/ethtool/linkmodes.c @@ -334,8 +334,10 @@ int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info) info->extack); if (ret < 0) return ret; - ret = ethnl_parse_header(&req_info, tb[ETHTOOL_A_LINKMODES_HEADER], - genl_info_net(info), info->extack, true); + ret = ethnl_parse_header_dev_get(&req_info, + tb[ETHTOOL_A_LINKMODES_HEADER], + genl_info_net(info), info->extack, + true); if (ret < 0) return ret; dev = req_info.dev; diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 180c194fab07..55c8ce4019d9 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -18,7 +18,7 @@ static const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_MAX + 1] = { }; /** - * ethnl_parse_header() - parse request header + * ethnl_parse_header_dev_get() - parse request header * @req_info: structure to put results into * @header: nest attribute with request header * @net: request netns @@ -33,9 +33,9 @@ static const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_MAX + 1] = { * * Return: 0 on success or negative error code */ -int ethnl_parse_header(struct ethnl_req_info *req_info, - const struct nlattr *header, struct net *net, - struct netlink_ext_ack *extack, bool require_dev) +int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, + const struct nlattr *header, struct net *net, + struct netlink_ext_ack *extack, bool require_dev) { struct nlattr *tb[ETHTOOL_A_HEADER_MAX + 1]; const struct nlattr *devname_attr; @@ -215,6 +215,10 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_LINKSTATE_GET] = ðnl_linkstate_request_ops, [ETHTOOL_MSG_DEBUG_GET] = ðnl_debug_request_ops, [ETHTOOL_MSG_WOL_GET] = ðnl_wol_request_ops, + [ETHTOOL_MSG_FEATURES_GET] = ðnl_features_request_ops, + [ETHTOOL_MSG_PRIVFLAGS_GET] = ðnl_privflags_request_ops, + [ETHTOOL_MSG_RINGS_GET] = ðnl_rings_request_ops, + [ETHTOOL_MSG_CHANNELS_GET] = ðnl_channels_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -253,8 +257,8 @@ static int ethnl_default_parse(struct ethnl_req_info *req_info, request_ops->request_policy, extack); if (ret < 0) goto out; - ret = ethnl_parse_header(req_info, tb[request_ops->hdr_attr], net, - extack, require_dev); + ret = ethnl_parse_header_dev_get(req_info, tb[request_ops->hdr_attr], + net, extack, require_dev); if (ret < 0) goto out; @@ -527,6 +531,10 @@ ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = { [ETHTOOL_MSG_LINKMODES_NTF] = ðnl_linkmodes_request_ops, [ETHTOOL_MSG_DEBUG_NTF] = ðnl_debug_request_ops, [ETHTOOL_MSG_WOL_NTF] = ðnl_wol_request_ops, + [ETHTOOL_MSG_FEATURES_NTF] = ðnl_features_request_ops, + [ETHTOOL_MSG_PRIVFLAGS_NTF] = ðnl_privflags_request_ops, + [ETHTOOL_MSG_RINGS_NTF] = ðnl_rings_request_ops, + [ETHTOOL_MSG_CHANNELS_NTF] = ðnl_channels_request_ops, }; /* default notification handler */ @@ -612,6 +620,10 @@ static const ethnl_notify_handler_t ethnl_notify_handlers[] = { [ETHTOOL_MSG_LINKMODES_NTF] = ethnl_default_notify, [ETHTOOL_MSG_DEBUG_NTF] = ethnl_default_notify, [ETHTOOL_MSG_WOL_NTF] = ethnl_default_notify, + [ETHTOOL_MSG_FEATURES_NTF] = ethnl_default_notify, + [ETHTOOL_MSG_PRIVFLAGS_NTF] = ethnl_default_notify, + [ETHTOOL_MSG_RINGS_NTF] = ethnl_default_notify, + [ETHTOOL_MSG_CHANNELS_NTF] = ethnl_default_notify, }; void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data) @@ -629,6 +641,29 @@ void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data) } EXPORT_SYMBOL(ethtool_notify); +static void ethnl_notify_features(struct netdev_notifier_info *info) +{ + struct net_device *dev = netdev_notifier_info_to_dev(info); + + ethtool_notify(dev, ETHTOOL_MSG_FEATURES_NTF, NULL); +} + +static int ethnl_netdev_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + switch (event) { + case NETDEV_FEAT_CHANGE: + ethnl_notify_features(ptr); + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block ethnl_netdev_notifier = { + .notifier_call = ethnl_netdev_event, +}; + /* genetlink setup */ static const struct genl_ops ethtool_genl_ops[] = { @@ -695,6 +730,54 @@ static const struct genl_ops ethtool_genl_ops[] = { .flags = GENL_UNS_ADMIN_PERM, .doit = ethnl_set_wol, }, + { + .cmd = ETHTOOL_MSG_FEATURES_GET, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + }, + { + .cmd = ETHTOOL_MSG_FEATURES_SET, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_set_features, + }, + { + .cmd = ETHTOOL_MSG_PRIVFLAGS_GET, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + }, + { + .cmd = ETHTOOL_MSG_PRIVFLAGS_SET, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_set_privflags, + }, + { + .cmd = ETHTOOL_MSG_RINGS_GET, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + }, + { + .cmd = ETHTOOL_MSG_RINGS_SET, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_set_rings, + }, + { + .cmd = ETHTOOL_MSG_CHANNELS_GET, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + }, + { + .cmd = ETHTOOL_MSG_CHANNELS_SET, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_set_channels, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { @@ -723,7 +806,9 @@ static int __init ethnl_init(void) return ret; ethnl_ok = true; - return 0; + ret = register_netdevice_notifier(ðnl_netdev_notifier); + WARN(ret < 0, "ethtool: net device notifier registration failed"); + return ret; } subsys_initcall(ethnl_init); diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 60efd87686ad..45aad99a6021 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -10,9 +10,10 @@ struct ethnl_req_info; -int ethnl_parse_header(struct ethnl_req_info *req_info, - const struct nlattr *nest, struct net *net, - struct netlink_ext_ack *extack, bool require_dev); +int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, + const struct nlattr *nest, struct net *net, + struct netlink_ext_ack *extack, + bool require_dev); int ethnl_fill_reply_header(struct sk_buff *skb, struct net_device *dev, u16 attrtype); struct sk_buff *ethnl_reply_init(size_t payload, struct net_device *dev, u8 cmd, @@ -336,10 +337,18 @@ extern const struct ethnl_request_ops ethnl_linkmodes_request_ops; extern const struct ethnl_request_ops ethnl_linkstate_request_ops; extern const struct ethnl_request_ops ethnl_debug_request_ops; extern const struct ethnl_request_ops ethnl_wol_request_ops; +extern const struct ethnl_request_ops ethnl_features_request_ops; +extern const struct ethnl_request_ops ethnl_privflags_request_ops; +extern const struct ethnl_request_ops ethnl_rings_request_ops; +extern const struct ethnl_request_ops ethnl_channels_request_ops; int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info); int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info); int ethnl_set_debug(struct sk_buff *skb, struct genl_info *info); int ethnl_set_wol(struct sk_buff *skb, struct genl_info *info); +int ethnl_set_features(struct sk_buff *skb, struct genl_info *info); +int ethnl_set_privflags(struct sk_buff *skb, struct genl_info *info); +int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info); +int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info); #endif /* _NET_ETHTOOL_NETLINK_H */ diff --git a/net/ethtool/privflags.c b/net/ethtool/privflags.c new file mode 100644 index 000000000000..e8f03b33db9b --- /dev/null +++ b/net/ethtool/privflags.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "netlink.h" +#include "common.h" +#include "bitset.h" + +struct privflags_req_info { + struct ethnl_req_info base; +}; + +struct privflags_reply_data { + struct ethnl_reply_data base; + const char (*priv_flag_names)[ETH_GSTRING_LEN]; + unsigned int n_priv_flags; + u32 priv_flags; +}; + +#define PRIVFLAGS_REPDATA(__reply_base) \ + container_of(__reply_base, struct privflags_reply_data, base) + +static const struct nla_policy +privflags_get_policy[ETHTOOL_A_PRIVFLAGS_MAX + 1] = { + [ETHTOOL_A_PRIVFLAGS_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_PRIVFLAGS_HEADER] = { .type = NLA_NESTED }, + [ETHTOOL_A_PRIVFLAGS_FLAGS] = { .type = NLA_REJECT }, +}; + +static int ethnl_get_priv_flags_info(struct net_device *dev, + unsigned int *count, + const char (**names)[ETH_GSTRING_LEN]) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + int nflags; + + nflags = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS); + if (nflags < 0) + return nflags; + + if (names) { + *names = kcalloc(nflags, ETH_GSTRING_LEN, GFP_KERNEL); + if (!*names) + return -ENOMEM; + ops->get_strings(dev, ETH_SS_PRIV_FLAGS, (u8 *)*names); + } + + /* We can pass more than 32 private flags to userspace via netlink but + * we cannot get more with ethtool_ops::get_priv_flags(). Note that we + * must not adjust nflags before allocating the space for flag names + * as the buffer must be large enough for all flags. + */ + if (WARN_ONCE(nflags > 32, + "device %s reports more than 32 private flags (%d)\n", + netdev_name(dev), nflags)) + nflags = 32; + *count = nflags; + + return 0; +} + +static int privflags_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + struct genl_info *info) +{ + struct privflags_reply_data *data = PRIVFLAGS_REPDATA(reply_base); + struct net_device *dev = reply_base->dev; + const char (*names)[ETH_GSTRING_LEN]; + const struct ethtool_ops *ops; + unsigned int nflags; + int ret; + + ops = dev->ethtool_ops; + if (!ops->get_priv_flags || !ops->get_sset_count || !ops->get_strings) + return -EOPNOTSUPP; + ret = ethnl_ops_begin(dev); + if (ret < 0) + return ret; + + ret = ethnl_get_priv_flags_info(dev, &nflags, &names); + if (ret < 0) + goto out_ops; + data->priv_flags = ops->get_priv_flags(dev); + data->priv_flag_names = names; + data->n_priv_flags = nflags; + +out_ops: + ethnl_ops_complete(dev); + return ret; +} + +static int privflags_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct privflags_reply_data *data = PRIVFLAGS_REPDATA(reply_base); + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + const u32 all_flags = ~(u32)0 >> (32 - data->n_priv_flags); + + return ethnl_bitset32_size(&data->priv_flags, &all_flags, + data->n_priv_flags, + data->priv_flag_names, compact); +} + +static int privflags_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct privflags_reply_data *data = PRIVFLAGS_REPDATA(reply_base); + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + const u32 all_flags = ~(u32)0 >> (32 - data->n_priv_flags); + + return ethnl_put_bitset32(skb, ETHTOOL_A_PRIVFLAGS_FLAGS, + &data->priv_flags, &all_flags, + data->n_priv_flags, data->priv_flag_names, + compact); +} + +static void privflags_cleanup_data(struct ethnl_reply_data *reply_data) +{ + struct privflags_reply_data *data = PRIVFLAGS_REPDATA(reply_data); + + kfree(data->priv_flag_names); +} + +const struct ethnl_request_ops ethnl_privflags_request_ops = { + .request_cmd = ETHTOOL_MSG_PRIVFLAGS_GET, + .reply_cmd = ETHTOOL_MSG_PRIVFLAGS_GET_REPLY, + .hdr_attr = ETHTOOL_A_PRIVFLAGS_HEADER, + .max_attr = ETHTOOL_A_PRIVFLAGS_MAX, + .req_info_size = sizeof(struct privflags_req_info), + .reply_data_size = sizeof(struct privflags_reply_data), + .request_policy = privflags_get_policy, + + .prepare_data = privflags_prepare_data, + .reply_size = privflags_reply_size, + .fill_reply = privflags_fill_reply, + .cleanup_data = privflags_cleanup_data, +}; + +/* PRIVFLAGS_SET */ + +static const struct nla_policy +privflags_set_policy[ETHTOOL_A_PRIVFLAGS_MAX + 1] = { + [ETHTOOL_A_PRIVFLAGS_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_PRIVFLAGS_HEADER] = { .type = NLA_NESTED }, + [ETHTOOL_A_PRIVFLAGS_FLAGS] = { .type = NLA_NESTED }, +}; + +int ethnl_set_privflags(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *tb[ETHTOOL_A_PRIVFLAGS_MAX + 1]; + const char (*names)[ETH_GSTRING_LEN] = NULL; + struct ethnl_req_info req_info = {}; + const struct ethtool_ops *ops; + struct net_device *dev; + unsigned int nflags; + bool mod = false; + bool compact; + u32 flags; + int ret; + + ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb, + ETHTOOL_A_PRIVFLAGS_MAX, privflags_set_policy, + info->extack); + if (ret < 0) + return ret; + if (!tb[ETHTOOL_A_PRIVFLAGS_FLAGS]) + return -EINVAL; + ret = ethnl_bitset_is_compact(tb[ETHTOOL_A_PRIVFLAGS_FLAGS], &compact); + if (ret < 0) + return ret; + ret = ethnl_parse_header_dev_get(&req_info, + tb[ETHTOOL_A_PRIVFLAGS_HEADER], + genl_info_net(info), info->extack, + true); + if (ret < 0) + return ret; + dev = req_info.dev; + ops = dev->ethtool_ops; + if (!ops->get_priv_flags || !ops->set_priv_flags || + !ops->get_sset_count || !ops->get_strings) + return -EOPNOTSUPP; + + rtnl_lock(); + ret = ethnl_ops_begin(dev); + if (ret < 0) + goto out_rtnl; + ret = ethnl_get_priv_flags_info(dev, &nflags, compact ? NULL : &names); + if (ret < 0) + goto out_ops; + flags = ops->get_priv_flags(dev); + + ret = ethnl_update_bitset32(&flags, nflags, + tb[ETHTOOL_A_PRIVFLAGS_FLAGS], names, + info->extack, &mod); + if (ret < 0 || !mod) + goto out_free; + ret = ops->set_priv_flags(dev, flags); + if (ret < 0) + goto out_free; + ethtool_notify(dev, ETHTOOL_MSG_PRIVFLAGS_NTF, NULL); + +out_free: + kfree(names); +out_ops: + ethnl_ops_complete(dev); +out_rtnl: + rtnl_unlock(); + dev_put(dev); + return ret; +} diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c new file mode 100644 index 000000000000..5422526f4eef --- /dev/null +++ b/net/ethtool/rings.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "netlink.h" +#include "common.h" + +struct rings_req_info { + struct ethnl_req_info base; +}; + +struct rings_reply_data { + struct ethnl_reply_data base; + struct ethtool_ringparam ringparam; +}; + +#define RINGS_REPDATA(__reply_base) \ + container_of(__reply_base, struct rings_reply_data, base) + +static const struct nla_policy +rings_get_policy[ETHTOOL_A_RINGS_MAX + 1] = { + [ETHTOOL_A_RINGS_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_HEADER] = { .type = NLA_NESTED }, + [ETHTOOL_A_RINGS_RX_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_RX_MINI_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_RX_JUMBO_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_TX_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_RX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_RX_MINI] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_RX_JUMBO] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_TX] = { .type = NLA_REJECT }, +}; + +static int rings_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + struct genl_info *info) +{ + struct rings_reply_data *data = RINGS_REPDATA(reply_base); + struct net_device *dev = reply_base->dev; + int ret; + + if (!dev->ethtool_ops->get_ringparam) + return -EOPNOTSUPP; + ret = ethnl_ops_begin(dev); + if (ret < 0) + return ret; + dev->ethtool_ops->get_ringparam(dev, &data->ringparam); + ethnl_ops_complete(dev); + + return 0; +} + +static int rings_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + return nla_total_size(sizeof(u32)) + /* _RINGS_RX_MAX */ + nla_total_size(sizeof(u32)) + /* _RINGS_RX_MINI_MAX */ + nla_total_size(sizeof(u32)) + /* _RINGS_RX_JUMBO_MAX */ + nla_total_size(sizeof(u32)) + /* _RINGS_TX_MAX */ + nla_total_size(sizeof(u32)) + /* _RINGS_RX */ + nla_total_size(sizeof(u32)) + /* _RINGS_RX_MINI */ + nla_total_size(sizeof(u32)) + /* _RINGS_RX_JUMBO */ + nla_total_size(sizeof(u32)); /* _RINGS_TX */ +} + +static int rings_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct rings_reply_data *data = RINGS_REPDATA(reply_base); + const struct ethtool_ringparam *ringparam = &data->ringparam; + + if ((ringparam->rx_max_pending && + (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_MAX, + ringparam->rx_max_pending) || + nla_put_u32(skb, ETHTOOL_A_RINGS_RX, + ringparam->rx_pending))) || + (ringparam->rx_mini_max_pending && + (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_MINI_MAX, + ringparam->rx_mini_max_pending) || + nla_put_u32(skb, ETHTOOL_A_RINGS_RX_MINI, + ringparam->rx_mini_pending))) || + (ringparam->rx_jumbo_max_pending && + (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_JUMBO_MAX, + ringparam->rx_jumbo_max_pending) || + nla_put_u32(skb, ETHTOOL_A_RINGS_RX_JUMBO, + ringparam->rx_jumbo_pending))) || + (ringparam->tx_max_pending && + (nla_put_u32(skb, ETHTOOL_A_RINGS_TX_MAX, + ringparam->tx_max_pending) || + nla_put_u32(skb, ETHTOOL_A_RINGS_TX, + ringparam->tx_pending)))) + return -EMSGSIZE; + + return 0; +} + +const struct ethnl_request_ops ethnl_rings_request_ops = { + .request_cmd = ETHTOOL_MSG_RINGS_GET, + .reply_cmd = ETHTOOL_MSG_RINGS_GET_REPLY, + .hdr_attr = ETHTOOL_A_RINGS_HEADER, + .max_attr = ETHTOOL_A_RINGS_MAX, + .req_info_size = sizeof(struct rings_req_info), + .reply_data_size = sizeof(struct rings_reply_data), + .request_policy = rings_get_policy, + + .prepare_data = rings_prepare_data, + .reply_size = rings_reply_size, + .fill_reply = rings_fill_reply, +}; + +/* RINGS_SET */ + +static const struct nla_policy +rings_set_policy[ETHTOOL_A_RINGS_MAX + 1] = { + [ETHTOOL_A_RINGS_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_HEADER] = { .type = NLA_NESTED }, + [ETHTOOL_A_RINGS_RX_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_RX_MINI_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_RX_JUMBO_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_TX_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_RX] = { .type = NLA_U32 }, + [ETHTOOL_A_RINGS_RX_MINI] = { .type = NLA_U32 }, + [ETHTOOL_A_RINGS_RX_JUMBO] = { .type = NLA_U32 }, + [ETHTOOL_A_RINGS_TX] = { .type = NLA_U32 }, +}; + +int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *tb[ETHTOOL_A_RINGS_MAX + 1]; + struct ethtool_ringparam ringparam = {}; + struct ethnl_req_info req_info = {}; + const struct nlattr *err_attr; + const struct ethtool_ops *ops; + struct net_device *dev; + bool mod = false; + int ret; + + ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb, + ETHTOOL_A_RINGS_MAX, rings_set_policy, + info->extack); + if (ret < 0) + return ret; + ret = ethnl_parse_header_dev_get(&req_info, + tb[ETHTOOL_A_RINGS_HEADER], + genl_info_net(info), info->extack, + true); + if (ret < 0) + return ret; + dev = req_info.dev; + ops = dev->ethtool_ops; + ret = -EOPNOTSUPP; + if (!ops->get_ringparam || !ops->set_ringparam) + goto out_dev; + + rtnl_lock(); + ret = ethnl_ops_begin(dev); + if (ret < 0) + goto out_rtnl; + ops->get_ringparam(dev, &ringparam); + + ethnl_update_u32(&ringparam.rx_pending, tb[ETHTOOL_A_RINGS_RX], &mod); + ethnl_update_u32(&ringparam.rx_mini_pending, + tb[ETHTOOL_A_RINGS_RX_MINI], &mod); + ethnl_update_u32(&ringparam.rx_jumbo_pending, + tb[ETHTOOL_A_RINGS_RX_JUMBO], &mod); + ethnl_update_u32(&ringparam.tx_pending, tb[ETHTOOL_A_RINGS_TX], &mod); + ret = 0; + if (!mod) + goto out_ops; + + /* ensure new ring parameters are within limits */ + if (ringparam.rx_pending > ringparam.rx_max_pending) + err_attr = tb[ETHTOOL_A_RINGS_RX]; + else if (ringparam.rx_mini_pending > ringparam.rx_mini_max_pending) + err_attr = tb[ETHTOOL_A_RINGS_RX_MINI]; + else if (ringparam.rx_jumbo_pending > ringparam.rx_jumbo_max_pending) + err_attr = tb[ETHTOOL_A_RINGS_RX_JUMBO]; + else if (ringparam.tx_pending > ringparam.tx_max_pending) + err_attr = tb[ETHTOOL_A_RINGS_TX]; + else + err_attr = NULL; + if (err_attr) { + ret = -EINVAL; + NL_SET_ERR_MSG_ATTR(info->extack, err_attr, + "requested ring size exceeds maximum"); + goto out_ops; + } + + ret = dev->ethtool_ops->set_ringparam(dev, &ringparam); + if (ret < 0) + goto out_ops; + ethtool_notify(dev, ETHTOOL_MSG_RINGS_NTF, NULL); + +out_ops: + ethnl_ops_complete(dev); +out_rtnl: + rtnl_unlock(); +out_dev: + dev_put(dev); + return ret; +} diff --git a/net/ethtool/wol.c b/net/ethtool/wol.c index e1b8a65b64c4..1d2bcabee554 100644 --- a/net/ethtool/wol.c +++ b/net/ethtool/wol.c @@ -123,8 +123,9 @@ int ethnl_set_wol(struct sk_buff *skb, struct genl_info *info) wol_set_policy, info->extack); if (ret < 0) return ret; - ret = ethnl_parse_header(&req_info, tb[ETHTOOL_A_WOL_HEADER], - genl_info_net(info), info->extack, true); + ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_WOL_HEADER], + genl_info_net(info), info->extack, + true); if (ret < 0) return ret; dev = req_info.dev; diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c index 2c7a38d76a3a..0672b2f01586 100644 --- a/net/ieee802154/nl_policy.c +++ b/net/ieee802154/nl_policy.c @@ -21,7 +21,13 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { [IEEE802154_ATTR_HW_ADDR] = { .type = NLA_HW_ADDR, }, [IEEE802154_ATTR_PAN_ID] = { .type = NLA_U16, }, [IEEE802154_ATTR_CHANNEL] = { .type = NLA_U8, }, + [IEEE802154_ATTR_BCN_ORD] = { .type = NLA_U8, }, + [IEEE802154_ATTR_SF_ORD] = { .type = NLA_U8, }, + [IEEE802154_ATTR_PAN_COORD] = { .type = NLA_U8, }, + [IEEE802154_ATTR_BAT_EXT] = { .type = NLA_U8, }, + [IEEE802154_ATTR_COORD_REALIGN] = { .type = NLA_U8, }, [IEEE802154_ATTR_PAGE] = { .type = NLA_U8, }, + [IEEE802154_ATTR_DEV_TYPE] = { .type = NLA_U8, }, [IEEE802154_ATTR_COORD_SHORT_ADDR] = { .type = NLA_U16, }, [IEEE802154_ATTR_COORD_HW_ADDR] = { .type = NLA_HW_ADDR, }, [IEEE802154_ATTR_COORD_PAN_ID] = { .type = NLA_U16, }, diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 9d97bace13c8..9e1a186a3671 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -61,6 +61,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o +obj-$(CONFIG_BPF_STREAM_PARSER) += udp_bpf.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 2fe295432c24..bd7b4e92e07f 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -872,7 +872,7 @@ int inet_shutdown(struct socket *sock, int how) err = -ENOTCONN; /* Hack to wake up other listeners, who can poll for EPOLLHUP, even on eg. unconnected UDP sockets -- RR */ - /* fall through */ + fallthrough; default: sk->sk_shutdown |= how; if (sk->sk_prot->shutdown) @@ -886,7 +886,7 @@ int inet_shutdown(struct socket *sock, int how) case TCP_LISTEN: if (!(how & RCV_SHUTDOWN)) break; - /* fall through */ + fallthrough; case TCP_SYN_SENT: err = sk->sk_prot->disconnect(sk, O_NONBLOCK); sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 974179b3b314..d99e1be94019 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -107,7 +107,7 @@ static int ip_clear_mutable_options(const struct iphdr *iph, __be32 *daddr) if (optlen < 6) return -EINVAL; memcpy(daddr, optptr+optlen-4, 4); - /* Fall through */ + fallthrough; default: memset(optptr, 0, optlen); } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 05eb42f347e8..687971d83b4e 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1181,7 +1181,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCSARP: if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; - /* fall through */ + fallthrough; case SIOCGARP: err = copy_from_user(&r, arg, sizeof(struct arpreq)); if (err) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index e4632bd2026d..30fa42f5997d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1566,11 +1566,11 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, } } ip_mc_up(in_dev); - /* fall through */ + fallthrough; case NETDEV_CHANGEADDR: if (!IN_DEV_ARP_NOTIFY(in_dev)) break; - /* fall through */ + fallthrough; case NETDEV_NOTIFY_PEERS: /* Send gratuitous ARP to notify of link change */ inetdev_send_gratuitous_arp(dev, in_dev); @@ -1588,7 +1588,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, if (inetdev_valid_mtu(dev->mtu)) break; /* disable IP when MTU is not enough */ - /* fall through */ + fallthrough; case NETDEV_UNREGISTER: inetdev_destroy(in_dev); break; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index a803cdd9400a..e4c62b8f57a8 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1962,7 +1962,7 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) case NETDEV_DOWN: case NETDEV_UNREGISTER: nexthop_nh->fib_nh_flags |= RTNH_F_DEAD; - /* fall through */ + fallthrough; case NETDEV_CHANGE: nexthop_nh->fib_nh_flags |= RTNH_F_LINKDOWN; break; @@ -1984,7 +1984,7 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) case NETDEV_DOWN: case NETDEV_UNREGISTER: fi->fib_flags |= RTNH_F_DEAD; - /* fall through */ + fallthrough; case NETDEV_CHANGE: fi->fib_flags |= RTNH_F_LINKDOWN; break; diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 5fd6e8ed02b5..66fdbfe5447c 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -56,7 +56,9 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version) } EXPORT_SYMBOL_GPL(gre_del_protocol); -/* Fills in tpi and returns header length to be pulled. */ +/* Fills in tpi and returns header length to be pulled. + * Note that caller must use pskb_may_pull() before pulling GRE header. + */ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, bool *csum_err, __be16 proto, int nhs) { @@ -110,8 +112,14 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header */ if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { + u8 _val, *val; + + val = skb_header_pointer(skb, nhs + hdr_len, + sizeof(_val), &_val); + if (!val) + return -EINVAL; tpi->proto = proto; - if ((*(u8 *)options & 0xF0) != 0x40) + if ((*val & 0xF0) != 0x40) hdr_len += 4; } tpi->hdr_len = hdr_len; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index f369e7ce685b..fc61f51d87a3 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -865,7 +865,7 @@ static bool icmp_unreach(struct sk_buff *skb) case 3: if (!icmp_tag_validation(iph->protocol)) goto out; - /* fall through */ + fallthrough; case 0: info = ntohs(icmph->un.frag.mtu); } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index a4db79b1b643..5f34eb951627 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -131,7 +131,7 @@ static int inet_csk_bind_conflict(const struct sock *sk, { struct sock *sk2; bool reuse = sk->sk_reuse; - bool reuseport = !!sk->sk_reuseport && reuseport_ok; + bool reuseport = !!sk->sk_reuseport; kuid_t uid = sock_i_uid((struct sock *)sk); /* @@ -146,17 +146,21 @@ static int inet_csk_bind_conflict(const struct sock *sk, (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { - if ((!reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) && - (!reuseport || !sk2->sk_reuseport || - rcu_access_pointer(sk->sk_reuseport_cb) || - (sk2->sk_state != TCP_TIME_WAIT && - !uid_eq(uid, sock_i_uid(sk2))))) { - if (inet_rcv_saddr_equal(sk, sk2, true)) - break; - } - if (!relax && reuse && sk2->sk_reuse && + if (reuse && sk2->sk_reuse && sk2->sk_state != TCP_LISTEN) { + if ((!relax || + (!reuseport_ok && + reuseport && sk2->sk_reuseport && + !rcu_access_pointer(sk->sk_reuseport_cb) && + (sk2->sk_state == TCP_TIME_WAIT || + uid_eq(uid, sock_i_uid(sk2))))) && + inet_rcv_saddr_equal(sk, sk2, true)) + break; + } else if (!reuseport_ok || + !reuseport || !sk2->sk_reuseport || + rcu_access_pointer(sk->sk_reuseport_cb) || + (sk2->sk_state != TCP_TIME_WAIT && + !uid_eq(uid, sock_i_uid(sk2)))) { if (inet_rcv_saddr_equal(sk, sk2, true)) break; } @@ -176,12 +180,14 @@ inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int * int port = 0; struct inet_bind_hashbucket *head; struct net *net = sock_net(sk); + bool relax = false; int i, low, high, attempt_half; struct inet_bind_bucket *tb; u32 remaining, offset; int l3mdev; l3mdev = inet_sk_bound_l3mdev(sk); +ports_exhausted: attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0; other_half_scan: inet_get_local_port_range(net, &low, &high); @@ -219,7 +225,7 @@ other_parity_scan: inet_bind_bucket_for_each(tb, &head->chain) if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev && tb->port == port) { - if (!inet_csk_bind_conflict(sk, tb, false, false)) + if (!inet_csk_bind_conflict(sk, tb, relax, false)) goto success; goto next_port; } @@ -239,6 +245,12 @@ next_port: attempt_half = 2; goto other_half_scan; } + + if (net->ipv4.sysctl_ip_autobind_reuse && !relax) { + /* We still have a chance to connect to different destinations */ + relax = true; + goto ports_exhausted; + } return NULL; success: *port_ret = port; @@ -482,8 +494,28 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) } spin_unlock_bh(&queue->fastopenq.lock); } + out: release_sock(sk); + if (newsk && mem_cgroup_sockets_enabled) { + int amt; + + /* atomically get the memory usage, set and charge the + * newsk->sk_memcg. + */ + lock_sock(newsk); + + /* The socket has not been accepted yet, no need to look at + * newsk->sk_wmem_queued. + */ + amt = sk_mem_pages(newsk->sk_forward_alloc + + atomic_read(&newsk->sk_rmem_alloc)); + mem_cgroup_sk_alloc(newsk); + if (newsk->sk_memcg && amt) + mem_cgroup_charge_skmem(newsk->sk_memcg, amt); + + release_sock(newsk); + } if (req) reqsk_put(req); return newsk; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index e1cad25909df..5d50aad3cdbf 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -101,13 +101,9 @@ static size_t inet_sk_attr_size(struct sock *sk, aux = handler->idiag_get_aux_size(sk, net_admin); return nla_total_size(sizeof(struct tcp_info)) - + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ - + nla_total_size(1) /* INET_DIAG_TOS */ - + nla_total_size(1) /* INET_DIAG_TCLASS */ - + nla_total_size(4) /* INET_DIAG_MARK */ - + nla_total_size(4) /* INET_DIAG_CLASS_ID */ - + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) + nla_total_size(TCP_CA_NAME_MAX) + nla_total_size(sizeof(struct tcpvegas_info)) @@ -148,6 +144,24 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark)) goto errout; + if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || + ext & (1 << (INET_DIAG_TCLASS - 1))) { + u32 classid = 0; + +#ifdef CONFIG_SOCK_CGROUP_DATA + classid = sock_cgroup_classid(&sk->sk_cgrp_data); +#endif + /* Fallback to socket priority if class id isn't set. + * Classful qdiscs use it as direct reference to class. + * For cgroup2 classid is always zero. + */ + if (!classid) + classid = sk->sk_priority; + + if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) + goto errout; + } + r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); r->idiag_inode = sock_i_ino(sk); @@ -289,24 +303,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, goto errout; } - if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || - ext & (1 << (INET_DIAG_TCLASS - 1))) { - u32 classid = 0; - -#ifdef CONFIG_SOCK_CGROUP_DATA - classid = sock_cgroup_classid(&sk->sk_cgrp_data); -#endif - /* Fallback to socket priority if class id isn't set. - * Classful qdiscs use it as direct reference to class. - * For cgroup2 classid is always zero. - */ - if (!classid) - classid = sk->sk_priority; - - if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) - goto errout; - } - /* Keep it at the end for potential retry with a larger skb, * or else do best-effort fitting, which is only done for the * first_nlmsg. diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d84819893db9..aaaaf907e0d8 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -333,7 +333,7 @@ static int ip_mc_finish_output(struct net *net, struct sock *sk, switch (ret) { case NET_XMIT_CN: do_cn = true; - /* fall through */ + fallthrough; case NET_XMIT_SUCCESS: break; default: diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 6e68def66822..9cf83cc85e4a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1465,7 +1465,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, case MRT_ADD_MFC: case MRT_DEL_MFC: parent = -1; - /* fall through */ + fallthrough; case MRT_ADD_MFC_PROXY: case MRT_DEL_MFC_PROXY: if (optlen != sizeof(mfc)) { diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index f1f78a742b36..b167f4a5b684 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1057,7 +1057,7 @@ struct compat_arpt_replace { u32 underflow[NF_ARP_NUMHOOKS]; u32 num_counters; compat_uptr_t counters; - struct compat_arpt_entry entries[0]; + struct compat_arpt_entry entries[]; }; static inline void compat_release_entry(struct compat_arpt_entry *e) @@ -1383,7 +1383,7 @@ static int compat_copy_entries_to_user(unsigned int total_size, struct compat_arpt_get_entries { char name[XT_TABLE_MAXNAMELEN]; compat_uint_t size; - struct compat_arpt_entry entrytable[0]; + struct compat_arpt_entry entrytable[]; }; static int compat_get_entries(struct net *net, diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 10b91ebdf213..c2670eaa74e6 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1211,7 +1211,7 @@ struct compat_ipt_replace { u32 underflow[NF_INET_NUMHOOKS]; u32 num_counters; compat_uptr_t counters; /* struct xt_counters * */ - struct compat_ipt_entry entries[0]; + struct compat_ipt_entry entries[]; }; static int @@ -1562,7 +1562,7 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, struct compat_ipt_get_entries { char name[XT_TABLE_MAXNAMELEN]; compat_uint_t size; - struct compat_ipt_entry entrytable[0]; + struct compat_ipt_entry entrytable[]; }; static int diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 4b2d49cc9f1a..0c72156130b6 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -173,7 +173,7 @@ static void dump_ipv4_packet(struct net *net, struct nf_log_buf *m, case ICMP_REDIRECT: /* Max length: 24 "GATEWAY=255.255.255.255 " */ nf_log_buf_add(m, "GATEWAY=%pI4 ", &ich->un.gateway); - /* Fall through */ + fallthrough; case ICMP_DEST_UNREACH: case ICMP_SOURCE_QUENCH: case ICMP_TIME_EXCEEDED: diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index b2aeb7bf5dac..3c25a467b3ef 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -168,7 +168,7 @@ pptp_outbound_pkt(struct sk_buff *skb, pr_debug("unknown outbound packet 0x%04x:%s\n", msg, msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0]); - /* fall through */ + fallthrough; case PPTP_SET_LINK_INFO: /* only need to NAT in case PAC is behind NAT box */ case PPTP_START_SESSION_REQUEST: @@ -271,7 +271,7 @@ pptp_inbound_pkt(struct sk_buff *skb, pr_debug("unknown inbound packet %s\n", msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0]); - /* fall through */ + fallthrough; case PPTP_START_SESSION_REQUEST: case PPTP_START_SESSION_REPLY: case PPTP_STOP_SESSION_REQUEST: diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index d072c326dd64..fdfca534d094 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1327,7 +1327,7 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, case AF_UNSPEC: if (tb[NHA_GROUP]) break; - /* fallthrough */ + fallthrough; default: NL_SET_ERR_MSG(extack, "Invalid address family"); goto out; diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index d19cce39be1b..1b5b8af27aaf 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -101,8 +101,9 @@ static int raw_diag_dump_one(struct netlink_callback *cb, if (IS_ERR(sk)) return PTR_ERR(sk); - rep = nlmsg_new(sizeof(struct inet_diag_msg) + - sizeof(struct inet_diag_meminfo) + 64, + rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + 64, GFP_KERNEL); if (!rep) { sock_put(sk); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 042599cc691d..788c69d9bfe0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1621,12 +1621,11 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, struct rtable *rt_dst_alloc(struct net_device *dev, unsigned int flags, u16 type, - bool nopolicy, bool noxfrm, bool will_cache) + bool nopolicy, bool noxfrm) { struct rtable *rt; rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, - (will_cache ? 0 : DST_HOST) | (nopolicy ? DST_NOPOLICY : 0) | (noxfrm ? DST_NOXFRM : 0)); @@ -1674,7 +1673,6 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt) new_rt->rt_gw6 = rt->rt_gw6; INIT_LIST_HEAD(&new_rt->rt_uncached); - new_rt->dst.flags |= DST_HOST; new_rt->dst.input = rt->dst.input; new_rt->dst.output = rt->dst.output; new_rt->dst.error = rt->dst.error; @@ -1734,7 +1732,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, flags |= RTCF_LOCAL; rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, - IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false); + IN_DEV_CONF_GET(in_dev, NOPOLICY), false); if (!rth) return -ENOBUFS; @@ -1851,7 +1849,7 @@ static int __mkroute_input(struct sk_buff *skb, rth = rt_dst_alloc(out_dev->dev, 0, res->type, IN_DEV_CONF_GET(in_dev, NOPOLICY), - IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache); + IN_DEV_CONF_GET(out_dev, NOXFRM)); if (!rth) { err = -ENOBUFS; goto cleanup; @@ -2219,7 +2217,7 @@ local_input: rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev, flags | RTCF_LOCAL, res->type, - IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache); + IN_DEV_CONF_GET(in_dev, NOPOLICY), false); if (!rth) goto e_nobufs; @@ -2443,8 +2441,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, add: rth = rt_dst_alloc(dev_out, flags, type, IN_DEV_CONF_GET(in_dev, NOPOLICY), - IN_DEV_CONF_GET(in_dev, NOXFRM), - do_cache); + IN_DEV_CONF_GET(in_dev, NOXFRM)); if (!rth) return ERR_PTR(-ENOBUFS); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d9531b4b33f2..81b267e990a1 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -764,6 +764,15 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, { + .procname = "ip_autobind_reuse", + .data = &init_net.ipv4.sysctl_ip_autobind_reuse, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { .procname = "fwmark_reflect", .data = &init_net.ipv4.sysctl_fwmark_reflect, .maxlen = sizeof(int), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b7134f76f840..5c57850fab4b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2251,7 +2251,7 @@ void tcp_set_state(struct sock *sk, int state) if (inet_csk(sk)->icsk_bind_hash && !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) inet_put_port(sk); - /* fall through */ + fallthrough; default: if (oldstate == TCP_ESTABLISHED) TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 645cc3009e64..f5f588b1f6e9 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -145,12 +145,13 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!tcp_is_cwnd_limited(sk)) return; - if (tcp_in_slow_start(tp)) - tcp_slow_start(tp, acked); - else { - bictcp_update(ca, tp->snd_cwnd); - tcp_cong_avoid_ai(tp, ca->cnt, 1); + if (tcp_in_slow_start(tp)) { + acked = tcp_slow_start(tp, acked); + if (!acked) + return; } + bictcp_update(ca, tp->snd_cwnd); + tcp_cong_avoid_ai(tp, ca->cnt, acked); } /* diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 7d6e1b75d4d4..fe7b4fbc31c1 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -528,57 +528,7 @@ out_err: return copied ? copied : err; } -static void tcp_bpf_remove(struct sock *sk, struct sk_psock *psock) -{ - struct sk_psock_link *link; - - while ((link = sk_psock_link_pop(psock))) { - sk_psock_unlink(sk, link); - sk_psock_free_link(link); - } -} - -static void tcp_bpf_unhash(struct sock *sk) -{ - void (*saved_unhash)(struct sock *sk); - struct sk_psock *psock; - - rcu_read_lock(); - psock = sk_psock(sk); - if (unlikely(!psock)) { - rcu_read_unlock(); - if (sk->sk_prot->unhash) - sk->sk_prot->unhash(sk); - return; - } - - saved_unhash = psock->saved_unhash; - tcp_bpf_remove(sk, psock); - rcu_read_unlock(); - saved_unhash(sk); -} - -static void tcp_bpf_close(struct sock *sk, long timeout) -{ - void (*saved_close)(struct sock *sk, long timeout); - struct sk_psock *psock; - - lock_sock(sk); - rcu_read_lock(); - psock = sk_psock(sk); - if (unlikely(!psock)) { - rcu_read_unlock(); - release_sock(sk); - return sk->sk_prot->close(sk, timeout); - } - - saved_close = psock->saved_close; - tcp_bpf_remove(sk, psock); - rcu_read_unlock(); - release_sock(sk); - saved_close(sk, timeout); -} - +#ifdef CONFIG_BPF_STREAM_PARSER enum { TCP_BPF_IPV4, TCP_BPF_IPV6, @@ -599,8 +549,8 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS], struct proto *base) { prot[TCP_BPF_BASE] = *base; - prot[TCP_BPF_BASE].unhash = tcp_bpf_unhash; - prot[TCP_BPF_BASE].close = tcp_bpf_close; + prot[TCP_BPF_BASE].unhash = sock_map_unhash; + prot[TCP_BPF_BASE].close = sock_map_close; prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg; prot[TCP_BPF_BASE].stream_memory_read = tcp_bpf_stream_read; @@ -629,28 +579,6 @@ static int __init tcp_bpf_v4_build_proto(void) } core_initcall(tcp_bpf_v4_build_proto); -static void tcp_bpf_update_sk_prot(struct sock *sk, struct sk_psock *psock) -{ - int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; - int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE; - - sk_psock_update_proto(sk, psock, &tcp_bpf_prots[family][config]); -} - -static void tcp_bpf_reinit_sk_prot(struct sock *sk, struct sk_psock *psock) -{ - int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; - int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE; - - /* Reinit occurs when program types change e.g. TCP_BPF_TX is removed - * or added requiring sk_prot hook updates. We keep original saved - * hooks in this case. - * - * Pairs with lockless read in sk_clone_lock(). - */ - WRITE_ONCE(sk->sk_prot, &tcp_bpf_prots[family][config]); -} - static int tcp_bpf_assert_proto_ops(struct proto *ops) { /* In order to avoid retpoline, we make assumptions when we call @@ -662,36 +590,21 @@ static int tcp_bpf_assert_proto_ops(struct proto *ops) ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP; } -void tcp_bpf_reinit(struct sock *sk) +struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock) { - struct sk_psock *psock; - - sock_owned_by_me(sk); - - rcu_read_lock(); - psock = sk_psock(sk); - tcp_bpf_reinit_sk_prot(sk, psock); - rcu_read_unlock(); -} + int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; + int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE; -int tcp_bpf_init(struct sock *sk) -{ - struct proto *ops = READ_ONCE(sk->sk_prot); - struct sk_psock *psock; + if (!psock->sk_proto) { + struct proto *ops = READ_ONCE(sk->sk_prot); - sock_owned_by_me(sk); + if (tcp_bpf_assert_proto_ops(ops)) + return ERR_PTR(-EINVAL); - rcu_read_lock(); - psock = sk_psock(sk); - if (unlikely(!psock || psock->sk_proto || - tcp_bpf_assert_proto_ops(ops))) { - rcu_read_unlock(); - return -EINVAL; + tcp_bpf_check_v6_needs_rebuild(sk, ops); } - tcp_bpf_check_v6_needs_rebuild(sk, ops); - tcp_bpf_update_sk_prot(sk, psock); - rcu_read_unlock(); - return 0; + + return &tcp_bpf_prots[family][config]; } /* If a child got cloned from a listening socket that had tcp_bpf @@ -707,3 +620,4 @@ void tcp_bpf_clone(const struct sock *sk, struct sock *newsk) if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE]) newsk->sk_prot = sk->sk_prot_creator; } +#endif /* CONFIG_BPF_STREAM_PARSER */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6b6b57000dad..bf4ced9273e8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2865,7 +2865,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, (*ack_flag & FLAG_LOST_RETRANS))) return; /* Change state if cwnd is undone or retransmits are lost */ - /* fall through */ + fallthrough; default: if (tcp_is_reno(tp)) { if (flag & FLAG_SND_UNA_ADVANCED) @@ -6367,7 +6367,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) mptcp_incoming_options(sk, skb, &tp->rx_opt); break; } - /* fall through */ + fallthrough; case TCP_FIN_WAIT1: case TCP_FIN_WAIT2: /* RFC 793 says to queue data in these states, @@ -6382,7 +6382,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) return 1; } } - /* Fall through */ + fallthrough; case TCP_ESTABLISHED: tcp_data_queue(sk, skb); queued = 1; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 52acf0bc2ee5..83a5d24e13b8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2072,7 +2072,7 @@ do_time_wait: } } /* to ACK */ - /* fall through */ + fallthrough; case TCP_TW_ACK: tcp_v4_timewait_ack(sk, skb); break; @@ -2368,7 +2368,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq) break; st->bucket = 0; st->state = TCP_SEQ_STATE_ESTABLISHED; - /* Fallthrough */ + fallthrough; case TCP_SEQ_STATE_ESTABLISHED: if (st->bucket > tcp_hashinfo.ehash_mask) break; diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 471571e1ab26..6cebf412d590 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -10,10 +10,9 @@ #include <net/tcp.h> /* These factors derived from the recommended values in the aer: - * .01 and and 7/8. We use 50 instead of 100 to account for - * delayed ack. + * .01 and and 7/8. */ -#define TCP_SCALABLE_AI_CNT 50U +#define TCP_SCALABLE_AI_CNT 100U #define TCP_SCALABLE_MD_SCALE 3 static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) @@ -23,11 +22,13 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!tcp_is_cwnd_limited(sk)) return; - if (tcp_in_slow_start(tp)) - tcp_slow_start(tp, acked); - else - tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT), - 1); + if (tcp_in_slow_start(tp)) { + acked = tcp_slow_start(tp, acked); + if (!acked) + return; + } + tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT), + acked); } static u32 tcp_scalable_ssthresh(struct sock *sk) diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 2703f24c5d1a..7c27aa629af1 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -105,13 +105,6 @@ void tcp_update_ulp(struct sock *sk, struct proto *proto, { struct inet_connection_sock *icsk = inet_csk(sk); - if (!icsk->icsk_ulp_ops) { - sk->sk_write_space = write_space; - /* Pairs with lockless read in sk_clone_lock() */ - WRITE_ONCE(sk->sk_prot, proto); - return; - } - if (icsk->icsk_ulp_ops->update) icsk->icsk_ulp_ops->update(sk, proto, write_space); } diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 3b36bb1a0dda..50a9a6e2c4cd 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -153,31 +153,34 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked) veno->diff = (tp->snd_cwnd << V_PARAM_SHIFT) - target_cwnd; if (tcp_in_slow_start(tp)) { - /* Slow start. */ - tcp_slow_start(tp, acked); + /* Slow start. */ + acked = tcp_slow_start(tp, acked); + if (!acked) + goto done; + } + + /* Congestion avoidance. */ + if (veno->diff < beta) { + /* In the "non-congestive state", increase cwnd + * every rtt. + */ + tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked); } else { - /* Congestion avoidance. */ - if (veno->diff < beta) { - /* In the "non-congestive state", increase cwnd - * every rtt. - */ - tcp_cong_avoid_ai(tp, tp->snd_cwnd, 1); - } else { - /* In the "congestive state", increase cwnd - * every other rtt. - */ - if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { - if (veno->inc && - tp->snd_cwnd < tp->snd_cwnd_clamp) { - tp->snd_cwnd++; - veno->inc = 0; - } else - veno->inc = 1; - tp->snd_cwnd_cnt = 0; + /* In the "congestive state", increase cwnd + * every other rtt. + */ + if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { + if (veno->inc && + tp->snd_cwnd < tp->snd_cwnd_clamp) { + tp->snd_cwnd++; + veno->inc = 0; } else - tp->snd_cwnd_cnt++; - } + veno->inc = 1; + tp->snd_cwnd_cnt = 0; + } else + tp->snd_cwnd_cnt += acked; } +done: if (tp->snd_cwnd < 2) tp->snd_cwnd = 2; else if (tp->snd_cwnd > tp->snd_cwnd_clamp) diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index e00570dd0a69..3bb448761ca3 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -36,8 +36,6 @@ struct yeah { u32 reno_count; u32 fast_count; - - u32 pkts_acked; }; static void tcp_yeah_init(struct sock *sk) @@ -57,18 +55,6 @@ static void tcp_yeah_init(struct sock *sk) tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); } -static void tcp_yeah_pkts_acked(struct sock *sk, - const struct ack_sample *sample) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - struct yeah *yeah = inet_csk_ca(sk); - - if (icsk->icsk_ca_state == TCP_CA_Open) - yeah->pkts_acked = sample->pkts_acked; - - tcp_vegas_pkts_acked(sk, sample); -} - static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); @@ -77,24 +63,19 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!tcp_is_cwnd_limited(sk)) return; - if (tcp_in_slow_start(tp)) - tcp_slow_start(tp, acked); + if (tcp_in_slow_start(tp)) { + acked = tcp_slow_start(tp, acked); + if (!acked) + goto do_vegas; + } - else if (!yeah->doing_reno_now) { + if (!yeah->doing_reno_now) { /* Scalable */ - - tp->snd_cwnd_cnt += yeah->pkts_acked; - if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)) { - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - tp->snd_cwnd_cnt = 0; - } - - yeah->pkts_acked = 1; - + tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT), + acked); } else { /* Reno */ - tcp_cong_avoid_ai(tp, tp->snd_cwnd, 1); + tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked); } /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt. @@ -118,7 +99,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) * of bytes we send in an RTT is often less than our cwnd will allow. * So we keep track of our cwnd separately, in v_beg_snd_cwnd. */ - +do_vegas: if (after(ack, yeah->vegas.beg_snd_nxt)) { /* We do the Vegas calculations only if we got enough RTT * samples that we can be reasonably sure that we got @@ -232,7 +213,7 @@ static struct tcp_congestion_ops tcp_yeah __read_mostly = { .set_state = tcp_vegas_state, .cwnd_event = tcp_vegas_cwnd_event, .get_info = tcp_vegas_get_info, - .pkts_acked = tcp_yeah_pkts_acked, + .pkts_acked = tcp_vegas_pkts_acked, .owner = THIS_MODULE, .name = "yeah", diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a68e2ac37f26..2633fc231593 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2563,7 +2563,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, case UDP_ENCAP_ESPINUDP_NON_IKE: up->encap_rcv = xfrm4_udp_encap_rcv; #endif - /* FALLTHROUGH */ + fallthrough; case UDP_ENCAP_L2TPINUDP: up->encap_type = val; lock_sock(sk); diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c new file mode 100644 index 000000000000..eddd973e6575 --- /dev/null +++ b/net/ipv4/udp_bpf.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Cloudflare Ltd https://cloudflare.com */ + +#include <linux/skmsg.h> +#include <net/sock.h> +#include <net/udp.h> + +enum { + UDP_BPF_IPV4, + UDP_BPF_IPV6, + UDP_BPF_NUM_PROTS, +}; + +static struct proto *udpv6_prot_saved __read_mostly; +static DEFINE_SPINLOCK(udpv6_prot_lock); +static struct proto udp_bpf_prots[UDP_BPF_NUM_PROTS]; + +static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base) +{ + *prot = *base; + prot->unhash = sock_map_unhash; + prot->close = sock_map_close; +} + +static void udp_bpf_check_v6_needs_rebuild(struct sock *sk, struct proto *ops) +{ + if (sk->sk_family == AF_INET6 && + unlikely(ops != smp_load_acquire(&udpv6_prot_saved))) { + spin_lock_bh(&udpv6_prot_lock); + if (likely(ops != udpv6_prot_saved)) { + udp_bpf_rebuild_protos(&udp_bpf_prots[UDP_BPF_IPV6], ops); + smp_store_release(&udpv6_prot_saved, ops); + } + spin_unlock_bh(&udpv6_prot_lock); + } +} + +static int __init udp_bpf_v4_build_proto(void) +{ + udp_bpf_rebuild_protos(&udp_bpf_prots[UDP_BPF_IPV4], &udp_prot); + return 0; +} +core_initcall(udp_bpf_v4_build_proto); + +struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock) +{ + int family = sk->sk_family == AF_INET ? UDP_BPF_IPV4 : UDP_BPF_IPV6; + + if (!psock->sk_proto) + udp_bpf_check_v6_needs_rebuild(sk, READ_ONCE(sk->sk_prot)); + + return &udp_bpf_prots[family]; +} diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 93884696abdd..1dbece34496e 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -63,8 +63,9 @@ static int udp_dump_one(struct udp_table *tbl, goto out; err = -ENOMEM; - rep = nlmsg_new(sizeof(struct inet_diag_msg) + - sizeof(struct inet_diag_meminfo) + 64, + rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + 64, GFP_KERNEL); if (!rep) goto out; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index cb493e15959c..5b9de773ce73 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1226,11 +1226,13 @@ check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires) } static void -cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_rt) +cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, + bool del_rt, bool del_peer) { struct fib6_info *f6i; - f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len, + f6i = addrconf_get_prefix_route(del_peer ? &ifp->peer_addr : &ifp->addr, + ifp->prefix_len, ifp->idev->dev, 0, RTF_DEFAULT, true); if (f6i) { if (del_rt) @@ -1293,7 +1295,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) if (action != CLEANUP_PREFIX_RT_NOP) { cleanup_prefix_route(ifp, expires, - action == CLEANUP_PREFIX_RT_DEL); + action == CLEANUP_PREFIX_RT_DEL, false); } /* clean up prefsrc entries */ @@ -3299,7 +3301,7 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) switch (idev->cnf.addr_gen_mode) { case IN6_ADDR_GEN_MODE_RANDOM: ipv6_gen_mode_random_init(idev); - /* fallthrough */ + fallthrough; case IN6_ADDR_GEN_MODE_STABLE_PRIVACY: if (!ipv6_generate_stable_address(&addr, 0, idev)) addrconf_add_linklocal(idev, &addr, @@ -3345,6 +3347,10 @@ static void addrconf_dev_config(struct net_device *dev) (dev->type != ARPHRD_NONE) && (dev->type != ARPHRD_RAWIP)) { /* Alas, we support only Ethernet autoconfiguration. */ + idev = __in6_dev_get(dev); + if (!IS_ERR_OR_NULL(idev) && dev->flags & IFF_UP && + dev->flags & IFF_MULTICAST) + ipv6_mc_up(idev); return; } @@ -3517,9 +3523,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, break; run_pending = 1; - - /* fall through */ - + fallthrough; case NETDEV_UP: case NETDEV_CHANGE: if (dev->flags & IFF_SLAVE) @@ -4586,12 +4590,14 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, } static int modify_prefix_route(struct inet6_ifaddr *ifp, - unsigned long expires, u32 flags) + unsigned long expires, u32 flags, + bool modify_peer) { struct fib6_info *f6i; u32 prio; - f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len, + f6i = addrconf_get_prefix_route(modify_peer ? &ifp->peer_addr : &ifp->addr, + ifp->prefix_len, ifp->idev->dev, 0, RTF_DEFAULT, true); if (!f6i) return -ENOENT; @@ -4602,7 +4608,8 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp, ip6_del_rt(dev_net(ifp->idev->dev), f6i); /* add new one */ - addrconf_prefix_route(&ifp->addr, ifp->prefix_len, + addrconf_prefix_route(modify_peer ? &ifp->peer_addr : &ifp->addr, + ifp->prefix_len, ifp->rt_priority, ifp->idev->dev, expires, flags, GFP_KERNEL); } else { @@ -4624,6 +4631,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg) unsigned long timeout; bool was_managetempaddr; bool had_prefixroute; + bool new_peer = false; ASSERT_RTNL(); @@ -4655,6 +4663,13 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg) cfg->preferred_lft = timeout; } + if (cfg->peer_pfx && + memcmp(&ifp->peer_addr, cfg->peer_pfx, sizeof(struct in6_addr))) { + if (!ipv6_addr_any(&ifp->peer_addr)) + cleanup_prefix_route(ifp, expires, true, true); + new_peer = true; + } + spin_lock_bh(&ifp->lock); was_managetempaddr = ifp->flags & IFA_F_MANAGETEMPADDR; had_prefixroute = ifp->flags & IFA_F_PERMANENT && @@ -4670,6 +4685,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg) if (cfg->rt_priority && cfg->rt_priority != ifp->rt_priority) ifp->rt_priority = cfg->rt_priority; + if (new_peer) + ifp->peer_addr = *cfg->peer_pfx; + spin_unlock_bh(&ifp->lock); if (!(ifp->flags&IFA_F_TENTATIVE)) ipv6_ifa_notify(0, ifp); @@ -4678,7 +4696,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg) int rc = -ENOENT; if (had_prefixroute) - rc = modify_prefix_route(ifp, expires, flags); + rc = modify_prefix_route(ifp, expires, flags, false); /* prefix route could have been deleted; if so restore it */ if (rc == -ENOENT) { @@ -4686,6 +4704,15 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg) ifp->rt_priority, ifp->idev->dev, expires, flags, GFP_KERNEL); } + + if (had_prefixroute && !ipv6_addr_any(&ifp->peer_addr)) + rc = modify_prefix_route(ifp, expires, flags, true); + + if (rc == -ENOENT && !ipv6_addr_any(&ifp->peer_addr)) { + addrconf_prefix_route(&ifp->peer_addr, ifp->prefix_len, + ifp->rt_priority, ifp->idev->dev, + expires, flags, GFP_KERNEL); + } } else if (had_prefixroute) { enum cleanup_prefix_rt_t action; unsigned long rt_expires; @@ -4696,7 +4723,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg) if (action != CLEANUP_PREFIX_RT_NOP) { cleanup_prefix_route(ifp, rt_expires, - action == CLEANUP_PREFIX_RT_DEL); + action == CLEANUP_PREFIX_RT_DEL, false); } } @@ -5983,9 +6010,9 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) if (ifp->idev->cnf.forwarding) addrconf_join_anycast(ifp); if (!ipv6_addr_any(&ifp->peer_addr)) - addrconf_prefix_route(&ifp->peer_addr, 128, 0, - ifp->idev->dev, 0, 0, - GFP_ATOMIC); + addrconf_prefix_route(&ifp->peer_addr, 128, + ifp->rt_priority, ifp->idev->dev, + 0, 0, GFP_ATOMIC); break; case RTM_DELADDR: if (ifp->idev->cnf.forwarding) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 871d6e52ec67..45e2adc56610 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -259,7 +259,7 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir) case NEXTHDR_DEST: if (dir == XFRM_POLICY_OUT) ipv6_rearrange_destopt(iph, exthdr.opth); - /* fall through */ + fallthrough; case NEXTHDR_HOP: if (!zero_out_mutable_opts(exthdr.opth)) { net_dbg_ratelimited("overrun %sopts\n", diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index ab5add0fe6b4..bcb9f5e62808 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -97,7 +97,7 @@ static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff, */ if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) break; - /* fall through */ + fallthrough; case 2: /* send ICMP PARM PROB regardless and drop packet */ icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff); return false; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index ef408a5090a2..2688f3e82165 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -898,7 +898,7 @@ static int icmpv6_rcv(struct sk_buff *skb) hdr = icmp6_hdr(skb); /* to notify */ - /* fall through */ + fallthrough; case ICMPV6_DEST_UNREACH: case ICMPV6_TIME_EXCEED: case ICMPV6_PARAMPROB: diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 72abf892302f..46ed56719476 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -2068,8 +2068,8 @@ static int fib6_walk_continue(struct fib6_walker *w) continue; } w->state = FWS_L; + fallthrough; #endif - /* fall through */ case FWS_L: left = rcu_dereference_protected(fn->left, 1); if (left) { @@ -2078,7 +2078,7 @@ static int fib6_walk_continue(struct fib6_walker *w) continue; } w->state = FWS_R; - /* fall through */ + fallthrough; case FWS_R: right = rcu_dereference_protected(fn->right, 1); if (right) { @@ -2088,7 +2088,7 @@ static int fib6_walk_continue(struct fib6_walker *w) } w->state = FWS_C; w->leaf = rcu_dereference_protected(fn->leaf, 1); - /* fall through */ + fallthrough; case FWS_C: if (w->leaf && fn->fn_flags & RTN_RTINFO) { int err; @@ -2107,7 +2107,7 @@ static int fib6_walk_continue(struct fib6_walker *w) } skip: w->state = FWS_U; - /* fall through */ + fallthrough; case FWS_U: if (fn == w->root) return 0; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index d6483926f449..65a54d74acc1 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1691,7 +1691,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns case MRT6_ADD_MFC: case MRT6_DEL_MFC: parent = -1; - /* fall through */ + fallthrough; case MRT6_ADD_MFC_PROXY: case MRT6_DEL_MFC_PROXY: if (optlen < sizeof(mfc)) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 53caf59c591e..4a3feccd5b10 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1782,7 +1782,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, case NETDEV_CHANGEADDR: neigh_changeaddr(&nd_tbl, dev); fib6_run_gc(0, net, false); - /* fallthrough */ + fallthrough; case NETDEV_UP: idev = in6_dev_get(dev); if (!idev) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index c973ace208c5..e27393498ecb 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1227,7 +1227,7 @@ struct compat_ip6t_replace { u32 underflow[NF_INET_NUMHOOKS]; u32 num_counters; compat_uptr_t counters; /* struct xt_counters * */ - struct compat_ip6t_entry entries[0]; + struct compat_ip6t_entry entries[]; }; static int @@ -1571,7 +1571,7 @@ compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, struct compat_ip6t_get_entries { char name[XT_TABLE_MAXNAMELEN]; compat_uint_t size; - struct compat_ip6t_entry entrytable[0]; + struct compat_ip6t_entry entrytable[]; }; static int diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index 22b80db6d882..da64550a5707 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -248,7 +248,7 @@ static void dump_ipv6_packet(struct net *net, struct nf_log_buf *m, /* Max length: 17 "POINTER=ffffffff " */ nf_log_buf_add(m, "POINTER=%08x ", ntohl(ic->icmp6_pointer)); - /* Fall through */ + fallthrough; case ICMPV6_DEST_UNREACH: case ICMPV6_PKT_TOOBIG: case ICMPV6_TIME_EXCEED: diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index dfe5e603ffe1..0028aa1d7869 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1076,7 +1076,7 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname, if (optname == IPV6_CHECKSUM || optname == IPV6_HDRINCL) break; - /* fall through */ + fallthrough; default: return ipv6_setsockopt(sk, level, optname, optval, optlen); } @@ -1099,7 +1099,7 @@ static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname, if (optname == IPV6_CHECKSUM || optname == IPV6_HDRINCL) break; - /* fall through */ + fallthrough; default: return compat_ipv6_setsockopt(sk, level, optname, optval, optlen); @@ -1161,7 +1161,7 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname, if (optname == IPV6_CHECKSUM || optname == IPV6_HDRINCL) break; - /* fall through */ + fallthrough; default: return ipv6_getsockopt(sk, level, optname, optval, optlen); } @@ -1184,7 +1184,7 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname, if (optname == IPV6_CHECKSUM || optname == IPV6_HDRINCL) break; - /* fall through */ + fallthrough; default: return compat_ipv6_getsockopt(sk, level, optname, optval, optlen); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2931224b674e..afcde55d537c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1062,8 +1062,6 @@ static unsigned short fib6_info_dst_flags(struct fib6_info *rt) flags |= DST_NOCOUNT; if (rt->dst_nopolicy) flags |= DST_NOPOLICY; - if (rt->dst_host) - flags |= DST_HOST; return flags; } @@ -1349,7 +1347,6 @@ static struct rt6_info *ip6_rt_cache_alloc(const struct fib6_result *res, ip6_rt_copy_init(rt, res); rt->rt6i_flags |= RTF_CACHE; - rt->dst.flags |= DST_HOST; rt->rt6i_dst.addr = *daddr; rt->rt6i_dst.plen = 128; @@ -3142,7 +3139,6 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, goto out; } - rt->dst.flags |= DST_HOST; rt->dst.input = ip6_input; rt->dst.output = ip6_output; rt->rt6i_gateway = fl6->daddr; @@ -3645,8 +3641,6 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); rt->fib6_dst.plen = cfg->fc_dst_len; - if (rt->fib6_dst.plen == 128) - rt->dst_host = true; #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len); @@ -4370,7 +4364,7 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); break; } - /* FALLTHROUGH */ + fallthrough; case IPSTATS_MIB_OUTNOROUTES: IP6_INC_STATS(net, idev, ipstats_mib_noroutes); break; diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index d8afe7290de8..ac837afb9040 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -268,7 +268,7 @@ static int seg6_do_srh(struct sk_buff *skb) skb_mac_header_rebuild(skb); skb_push(skb, skb->mac_len); - err = seg6_do_srh_encap(skb, tinfo->srh, NEXTHDR_NONE); + err = seg6_do_srh_encap(skb, tinfo->srh, IPPROTO_ETHERNET); if (err) return err; diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 7cbc19731997..8165802d8e05 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -282,7 +282,7 @@ static int input_action_end_dx2(struct sk_buff *skb, struct net_device *odev; struct ethhdr *eth; - if (!decap_and_validate(skb, NEXTHDR_NONE)) + if (!decap_and_validate(skb, IPPROTO_ETHERNET)) goto drop; if (!pskb_may_pull(skb, ETH_HLEN)) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index eaf09e6b7844..413b3425ac66 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1742,7 +1742,7 @@ do_time_wait: } } /* to ACK */ - /* fall through */ + fallthrough; case TCP_TW_ACK: tcp_v6_timewait_ack(sk, skb); break; diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c index 370da2f80e3c..25c1007f1098 100644 --- a/net/kcm/kcmproc.c +++ b/net/kcm/kcmproc.c @@ -261,7 +261,7 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v) aggregate_strp_stats(&knet->aggregate_strp_stats, &strp_stats); - list_for_each_entry_rcu(mux, &knet->mux_list, kcm_mux_list) { + list_for_each_entry(mux, &knet->mux_list, kcm_mux_list) { spin_lock_bh(&mux->lock); aggregate_mux_stats(&mux->stats, &mux_stats); aggregate_psock_stats(&mux->aggregate_psock_stats, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 7b654d2b8bb2..0f72813fed53 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -5,8 +5,7 @@ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2019 Intel Corporation - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018-2020 Intel Corporation */ #include <linux/ieee80211.h> @@ -1012,8 +1011,15 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, prev_beacon_int = sdata->vif.bss_conf.beacon_int; sdata->vif.bss_conf.beacon_int = params->beacon_interval; - if (params->he_cap) + if (params->he_cap && params->he_oper) { sdata->vif.bss_conf.he_support = true; + sdata->vif.bss_conf.htc_trig_based_pkt_ext = + le32_get_bits(params->he_oper->he_oper_params, + IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK); + sdata->vif.bss_conf.frame_time_rts_th = + le32_get_bits(params->he_oper->he_oper_params, + IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK); + } mutex_lock(&local->mtx); err = ieee80211_vif_use_channel(sdata, ¶ms->chandef, @@ -1034,6 +1040,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, sdata->control_port_no_encrypt = params->crypto.control_port_no_encrypt; sdata->control_port_over_nl80211 = params->crypto.control_port_over_nl80211; + sdata->control_port_no_preauth = + params->crypto.control_port_no_preauth; sdata->encrypt_headroom = ieee80211_cs_headroom(sdata->local, ¶ms->crypto, sdata->vif.type); @@ -1045,6 +1053,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, params->crypto.control_port_no_encrypt; vlan->control_port_over_nl80211 = params->crypto.control_port_over_nl80211; + vlan->control_port_no_preauth = + params->crypto.control_port_no_preauth; vlan->encrypt_headroom = ieee80211_cs_headroom(sdata->local, ¶ms->crypto, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index de39f9ca9935..f8ed4f621f7f 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -912,6 +912,7 @@ struct ieee80211_sub_if_data { u16 sequence_number; __be16 control_port_protocol; bool control_port_no_encrypt; + bool control_port_no_preauth; bool control_port_over_nl80211; int encrypt_headroom; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 128b3468d13e..d069825705d6 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -519,6 +519,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) master->control_port_no_encrypt; sdata->control_port_over_nl80211 = master->control_port_over_nl80211; + sdata->control_port_no_preauth = + master->control_port_no_preauth; sdata->vif.cab_queue = master->vif.cab_queue; memcpy(sdata->vif.hw_queue, master->vif.hw_queue, sizeof(sdata->vif.hw_queue)); @@ -1463,6 +1465,8 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, sdata->control_port_protocol = cpu_to_be16(ETH_P_PAE); sdata->control_port_no_encrypt = false; + sdata->control_port_over_nl80211 = false; + sdata->control_port_no_preauth = false; sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; sdata->vif.bss_conf.idle = true; sdata->vif.bss_conf.txpower = INT_MIN; /* unset */ diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 944e86da5c65..8345926193de 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -576,7 +576,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, WIPHY_FLAG_REPORTS_OBSS | WIPHY_FLAG_OFFCHAN_TX; - if (ops->remain_on_channel) + if (!use_chanctx || ops->remain_on_channel) wiphy->flags |= WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL; wiphy->features |= NL80211_FEATURE_SK_TX_STATUS | @@ -589,6 +589,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_FILS_STA); wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211); + wiphy_ext_feature_set(wiphy, + NL80211_EXT_FEATURE_CONTROL_PORT_NO_PREAUTH); if (!ops->hw_scan) { wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | @@ -1081,6 +1083,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) NL80211_EXT_FEATURE_EXT_KEY_ID); } + if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_ADHOC)) + wiphy_ext_feature_set(local->hw.wiphy, + NL80211_EXT_FEATURE_DEL_IBSS_STA); + /* * Calculate scan IE length -- we need this to alloc * memory and to subtract from the driver limit. It diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index d69983370381..38a0383dfbcf 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -1152,7 +1152,8 @@ int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata, } } - if (!(mpath->flags & MESH_PATH_RESOLVING)) + if (!(mpath->flags & MESH_PATH_RESOLVING) && + mesh_path_sel_is_hwmp(sdata)) mesh_queue_preq(mpath, PREQ_Q_F_START); if (skb_queue_len(&mpath->frame_queue) >= MESH_FRAME_QUEUE_LEN) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 8a2f2fa21916..16d75da0996a 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5458,6 +5458,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, sdata->control_port_no_encrypt = req->crypto.control_port_no_encrypt; sdata->control_port_over_nl80211 = req->crypto.control_port_over_nl80211; + sdata->control_port_no_preauth = req->crypto.control_port_no_preauth; sdata->encrypt_headroom = ieee80211_cs_headroom(local, &req->crypto, sdata->vif.type); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 2ffb4ee467e1..91a13aee4378 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2497,7 +2497,8 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb, struct net_device *dev = sdata->dev; if (unlikely((skb->protocol == sdata->control_port_protocol || - skb->protocol == cpu_to_be16(ETH_P_PREAUTH)) && + (skb->protocol == cpu_to_be16(ETH_P_PREAUTH) && + !sdata->control_port_no_preauth)) && sdata->control_port_over_nl80211)) { struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); bool noencrypt = !(status->flag & RX_FLAG_DECRYPTED); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f357156b86ee..8989a94cfe3f 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2150,19 +2150,41 @@ static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo) return 0; } +static inline u64 sta_get_tidstats_msdu(struct ieee80211_sta_rx_stats *rxstats, + int tid) +{ + unsigned int start; + u64 value; + + do { + start = u64_stats_fetch_begin(&rxstats->syncp); + value = rxstats->msdu[tid]; + } while (u64_stats_fetch_retry(&rxstats->syncp, start)); + + return value; +} + static void sta_set_tidstats(struct sta_info *sta, struct cfg80211_tid_stats *tidstats, int tid) { struct ieee80211_local *local = sta->local; + int cpu; if (!(tidstats->filled & BIT(NL80211_TID_STATS_RX_MSDU))) { - unsigned int start; + if (!ieee80211_hw_check(&local->hw, USES_RSS)) + tidstats->rx_msdu += + sta_get_tidstats_msdu(&sta->rx_stats, tid); - do { - start = u64_stats_fetch_begin(&sta->rx_stats.syncp); - tidstats->rx_msdu = sta->rx_stats.msdu[tid]; - } while (u64_stats_fetch_retry(&sta->rx_stats.syncp, start)); + if (sta->pcpu_rx_stats) { + for_each_possible_cpu(cpu) { + struct ieee80211_sta_rx_stats *cpurxs; + + cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu); + tidstats->rx_msdu += + sta_get_tidstats_msdu(cpurxs, tid); + } + } tidstats->filled |= BIT(NL80211_TID_STATS_RX_MSDU); } @@ -2266,7 +2288,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, if (!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES64) | BIT_ULL(NL80211_STA_INFO_RX_BYTES)))) { - sinfo->rx_bytes += sta_get_stats_bytes(&sta->rx_stats); + if (!ieee80211_hw_check(&local->hw, USES_RSS)) + sinfo->rx_bytes += sta_get_stats_bytes(&sta->rx_stats); if (sta->pcpu_rx_stats) { for_each_possible_cpu(cpu) { diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 83147385c200..49d35936cc9d 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -4670,6 +4670,7 @@ static int ieee80211_beacon_protect(struct sk_buff *skb, { ieee80211_tx_result res; struct ieee80211_tx_data tx; + struct sk_buff *check_skb; memset(&tx, 0, sizeof(tx)); tx.key = rcu_dereference(sdata->default_beacon_key); @@ -4680,8 +4681,11 @@ static int ieee80211_beacon_protect(struct sk_buff *skb, __skb_queue_head_init(&tx.skbs); __skb_queue_tail(&tx.skbs, skb); res = ieee80211_tx_h_encrypt(&tx); + check_skb = __skb_dequeue(&tx.skbs); + /* we may crash after this, but it'd be a bug in crypto */ + WARN_ON(check_skb != skb); if (WARN_ON_ONCE(res != TX_CONTINUE)) - return -1; + return -EINVAL; return 0; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index ea7d277a8c45..20436c86b9bf 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -912,8 +912,12 @@ static void ieee80211_parse_extension_element(u32 *crc, break; case WLAN_EID_EXT_HE_OPERATION: if (len >= sizeof(*elems->he_operation) && - len == ieee80211_he_oper_size(data) - 1) + len == ieee80211_he_oper_size(data) - 1) { + if (crc) + *crc = crc32_be(*crc, (void *)elem, + elem->datalen + 2); elems->he_operation = data; + } break; case WLAN_EID_EXT_UORA: if (len == 1) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index b9a8305bd934..aea1a62d9999 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -259,11 +259,11 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, struct mptcp_ext *mpext; unsigned int data_len; - pr_debug("subflow=%p fourth_ack=%d seq=%x:%x remaining=%d", subflow, - subflow->fourth_ack, subflow->snd_isn, + pr_debug("subflow=%p fully established=%d seq=%x:%x remaining=%d", + subflow, subflow->fully_established, subflow->snd_isn, skb ? TCP_SKB_CB(skb)->seq : 0, remaining); - if (subflow->mp_capable && !subflow->fourth_ack && skb && + if (subflow->mp_capable && !subflow->fully_established && skb && subflow->snd_isn == TCP_SKB_CB(skb)->seq) { /* When skb is not available, we better over-estimate the * emitted options len. A full DSS option is longer than @@ -360,9 +360,12 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, ret = true; } + /* passive sockets msk will set the 'can_ack' after accept(), even + * if the first subflow may have the already the remote key handy + */ opts->ext_copy.use_ack = 0; msk = mptcp_sk(subflow->conn); - if (!msk || !READ_ONCE(msk->can_ack)) { + if (!READ_ONCE(msk->can_ack)) { *size = ALIGN(dss_size, 4); return ret; } @@ -424,19 +427,19 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, return false; } -static bool check_fourth_ack(struct mptcp_subflow_context *subflow, - struct sk_buff *skb, - struct mptcp_options_received *mp_opt) +static bool check_fully_established(struct mptcp_subflow_context *subflow, + struct sk_buff *skb, + struct mptcp_options_received *mp_opt) { /* here we can process OoO, in-window pkts, only in-sequence 4th ack * are relevant */ - if (likely(subflow->fourth_ack || + if (likely(subflow->fully_established || TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1)) return true; if (mp_opt->use_ack) - subflow->fourth_ack = 1; + subflow->fully_established = 1; if (subflow->can_ack) return true; @@ -462,7 +465,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, struct mptcp_ext *mpext; mp_opt = &opt_rx->mptcp; - if (!check_fourth_ack(subflow, skb, mp_opt)) + if (!check_fully_established(subflow, skb, mp_opt)) return; if (!mp_opt->dss) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c0cef07f4382..92d5382e71f4 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -820,9 +820,12 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk) } #endif -static struct sock *mptcp_sk_clone_lock(const struct sock *sk) +struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req) { + struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC); + struct mptcp_sock *msk; + u64 ack_seq; if (!nsk) return NULL; @@ -832,6 +835,39 @@ static struct sock *mptcp_sk_clone_lock(const struct sock *sk) inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk); #endif + __mptcp_init_sock(nsk); + + msk = mptcp_sk(nsk); + msk->local_key = subflow_req->local_key; + msk->token = subflow_req->token; + msk->subflow = NULL; + + if (unlikely(mptcp_token_new_accept(subflow_req->token, nsk))) { + bh_unlock_sock(nsk); + + /* we can't call into mptcp_close() here - possible BH context + * free the sock directly + */ + nsk->sk_prot->destroy(nsk); + sk_free(nsk); + return NULL; + } + + msk->write_seq = subflow_req->idsn + 1; + if (subflow_req->remote_key_valid) { + msk->can_ack = true; + msk->remote_key = subflow_req->remote_key; + mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq); + ack_seq++; + msk->ack_seq = ack_seq; + } + + /* will be fully established after successful MPC subflow creation */ + inet_sk_state_store(nsk, TCP_SYN_RECV); + bh_unlock_sock(nsk); + + /* keep a single reference */ + __sock_put(nsk); return nsk; } @@ -859,62 +895,32 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, struct mptcp_subflow_context *subflow; struct sock *new_mptcp_sock; struct sock *ssk = newsk; - u64 ack_seq; subflow = mptcp_subflow_ctx(newsk); - lock_sock(sk); + new_mptcp_sock = subflow->conn; - local_bh_disable(); - new_mptcp_sock = mptcp_sk_clone_lock(sk); - if (!new_mptcp_sock) { - *err = -ENOBUFS; - local_bh_enable(); - release_sock(sk); - mptcp_subflow_shutdown(newsk, SHUT_RDWR + 1, 0, 0); - tcp_close(newsk, 0); - return NULL; + /* is_mptcp should be false if subflow->conn is missing, see + * subflow_syn_recv_sock() + */ + if (WARN_ON_ONCE(!new_mptcp_sock)) { + tcp_sk(newsk)->is_mptcp = 0; + return newsk; } - __mptcp_init_sock(new_mptcp_sock); + /* acquire the 2nd reference for the owning socket */ + sock_hold(new_mptcp_sock); + local_bh_disable(); + bh_lock_sock(new_mptcp_sock); msk = mptcp_sk(new_mptcp_sock); - msk->local_key = subflow->local_key; - msk->token = subflow->token; - msk->subflow = NULL; msk->first = newsk; - mptcp_token_update_accept(newsk, new_mptcp_sock); - - msk->write_seq = subflow->idsn + 1; - if (subflow->can_ack) { - msk->can_ack = true; - msk->remote_key = subflow->remote_key; - mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq); - ack_seq++; - msk->ack_seq = ack_seq; - } newsk = new_mptcp_sock; mptcp_copy_inaddrs(newsk, ssk); list_add(&subflow->node, &msk->conn_list); - /* will be fully established at mptcp_stream_accept() - * completion. - */ - inet_sk_state_store(new_mptcp_sock, TCP_SYN_RECV); bh_unlock_sock(new_mptcp_sock); local_bh_enable(); - release_sock(sk); - - /* the subflow can already receive packet, avoid racing with - * the receive path and process the pending ones - */ - lock_sock(ssk); - subflow->rel_write_seq = 1; - subflow->tcp_sock = ssk; - subflow->conn = new_mptcp_sock; - if (unlikely(!skb_queue_empty(&ssk->sk_receive_queue))) - mptcp_subflow_data_available(ssk); - release_sock(ssk); } return newsk; @@ -1049,10 +1055,6 @@ void mptcp_finish_connect(struct sock *ssk) WRITE_ONCE(msk->write_seq, subflow->idsn + 1); WRITE_ONCE(msk->ack_seq, ack_seq); WRITE_ONCE(msk->can_ack, 1); - if (inet_sk_state_load(sk) != TCP_ESTABLISHED) { - inet_sk_state_store(sk, TCP_ESTABLISHED); - sk->sk_state_change(sk); - } } static void mptcp_sock_graft(struct sock *sk, struct socket *parent) @@ -1249,8 +1251,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (!ssk->sk_socket) mptcp_sock_graft(ssk, newsock); } - - inet_sk_state_store(newsock->sk, TCP_ESTABLISHED); } sock_put(ssock->sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 313558fa8185..eb3f65264a40 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -119,7 +119,7 @@ struct mptcp_subflow_context { u32 map_data_len; u32 request_mptcp : 1, /* send MP_CAPABLE */ mp_capable : 1, /* remote is MPTCP capable */ - fourth_ack : 1, /* send initial DSS */ + fully_established : 1, /* path validated */ conn_finished : 1, map_valid : 1, mpc_map : 1, @@ -193,6 +193,7 @@ void mptcp_proto_init(void); int mptcp_proto_v6_init(void); #endif +struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req); void mptcp_get_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx); @@ -202,8 +203,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk); int mptcp_token_new_request(struct request_sock *req); void mptcp_token_destroy_request(u32 token); int mptcp_token_new_connect(struct sock *sk); -int mptcp_token_new_accept(u32 token); -void mptcp_token_update_accept(struct sock *sk, struct sock *conn); +int mptcp_token_new_accept(u32 token, struct sock *conn); void mptcp_token_destroy(u32 token); void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 0de2a44bdaa0..5bae12da2769 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -109,10 +109,16 @@ static void subflow_v6_init_req(struct request_sock *req, static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct sock *parent = subflow->conn; subflow->icsk_af_ops->sk_rx_dst_set(sk, skb); - if (subflow->conn && !subflow->conn_finished) { + if (inet_sk_state_load(parent) != TCP_ESTABLISHED) { + inet_sk_state_store(parent, TCP_ESTABLISHED); + parent->sk_state_change(parent); + } + + if (!subflow->conn_finished) { pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk), subflow->remote_key); mptcp_finish_connect(sk); @@ -182,6 +188,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk); struct mptcp_subflow_request_sock *subflow_req; struct tcp_options_received opt_rx; + struct sock *new_msk = NULL; struct sock *child; pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); @@ -197,7 +204,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, * out-of-order pkt, which will not carry the MP_CAPABLE * opt even on mptcp enabled paths */ - goto create_child; + goto create_msk; } opt_rx.mptcp.mp_capable = 0; @@ -207,7 +214,13 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, subflow_req->remote_key_valid = 1; } else { subflow_req->mp_capable = 0; + goto create_child; } + +create_msk: + new_msk = mptcp_sk_clone(listener->conn, req); + if (!new_msk) + subflow_req->mp_capable = 0; } create_child: @@ -221,22 +234,24 @@ create_child: * handshake */ if (!ctx) - return child; + goto out; if (ctx->mp_capable) { - if (mptcp_token_new_accept(ctx->token)) - goto close_child; + /* new mpc subflow takes ownership of the newly + * created mptcp socket + */ + inet_sk_state_store((struct sock *)new_msk, + TCP_ESTABLISHED); + ctx->conn = new_msk; + new_msk = NULL; } } +out: + /* dispose of the left over mptcp master, if any */ + if (unlikely(new_msk)) + sock_put(new_msk); return child; - -close_child: - pr_debug("closing child socket"); - tcp_send_active_reset(child, GFP_ATOMIC); - inet_csk_prepare_forced_close(child); - tcp_done(child); - return NULL; } static struct inet_connection_sock_af_ops subflow_specific; @@ -432,9 +447,6 @@ static bool subflow_check_data_avail(struct sock *ssk) if (subflow->data_avail) return true; - if (!subflow->conn) - return false; - msk = mptcp_sk(subflow->conn); for (;;) { u32 map_remaining; @@ -554,11 +566,10 @@ static void subflow_data_ready(struct sock *sk) struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct sock *parent = subflow->conn; - if (!parent || !subflow->mp_capable) { + if (!subflow->mp_capable) { subflow->tcp_data_ready(sk); - if (parent) - parent->sk_data_ready(parent); + parent->sk_data_ready(parent); return; } @@ -572,7 +583,7 @@ static void subflow_write_space(struct sock *sk) struct sock *parent = subflow->conn; sk_stream_write_space(sk); - if (parent && sk_stream_is_writeable(sk)) { + if (sk_stream_is_writeable(sk)) { set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags); smp_mb__after_atomic(); /* set SEND_SPACE before sk_stream_write_space clears NOSPACE */ @@ -687,7 +698,7 @@ static bool subflow_is_done(const struct sock *sk) static void subflow_state_change(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - struct sock *parent = READ_ONCE(subflow->conn); + struct sock *parent = subflow->conn; __subflow_state_change(sk); @@ -695,10 +706,10 @@ static void subflow_state_change(struct sock *sk) * a fin packet carrying a DSS can be unnoticed if we don't trigger * the data available machinery here. */ - if (parent && subflow->mp_capable && mptcp_subflow_data_available(sk)) + if (subflow->mp_capable && mptcp_subflow_data_available(sk)) mptcp_data_ready(parent, sk); - if (parent && !(parent->sk_shutdown & RCV_SHUTDOWN) && + if (!(parent->sk_shutdown & RCV_SHUTDOWN) && !subflow->rx_eof && subflow_is_done(sk)) { subflow->rx_eof = 1; parent->sk_shutdown |= RCV_SHUTDOWN; @@ -793,8 +804,11 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->tcp_data_ready = old_ctx->tcp_data_ready; new_ctx->tcp_state_change = old_ctx->tcp_state_change; new_ctx->tcp_write_space = old_ctx->tcp_write_space; + new_ctx->rel_write_seq = 1; + new_ctx->tcp_sock = newsk; + new_ctx->mp_capable = 1; - new_ctx->fourth_ack = subflow_req->remote_key_valid; + new_ctx->fully_established = subflow_req->remote_key_valid; new_ctx->can_ack = subflow_req->remote_key_valid; new_ctx->remote_key = subflow_req->remote_key; new_ctx->local_key = subflow_req->local_key; diff --git a/net/mptcp/token.c b/net/mptcp/token.c index 84d887806090..b71b53c0ac8d 100644 --- a/net/mptcp/token.c +++ b/net/mptcp/token.c @@ -128,46 +128,19 @@ int mptcp_token_new_connect(struct sock *sk) * * Called when a SYN packet creates a new logical connection, i.e. * is not a join request. - * - * We don't have an mptcp socket yet at that point. - * This is paired with mptcp_token_update_accept, called on accept(). */ -int mptcp_token_new_accept(u32 token) +int mptcp_token_new_accept(u32 token, struct sock *conn) { int err; spin_lock_bh(&token_tree_lock); - err = radix_tree_insert(&token_tree, token, &token_used); + err = radix_tree_insert(&token_tree, token, conn); spin_unlock_bh(&token_tree_lock); return err; } /** - * mptcp_token_update_accept - update token to map to mptcp socket - * @conn: the new struct mptcp_sock - * @sk: the initial subflow for this mptcp socket - * - * Called when the first mptcp socket is created on accept to - * refresh the dummy mapping (done to reserve the token) with - * the mptcp_socket structure that wasn't allocated before. - */ -void mptcp_token_update_accept(struct sock *sk, struct sock *conn) -{ - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - void __rcu **slot; - - spin_lock_bh(&token_tree_lock); - slot = radix_tree_lookup_slot(&token_tree, subflow->token); - WARN_ON_ONCE(!slot); - if (slot) { - WARN_ON_ONCE(rcu_access_pointer(*slot) != &token_used); - radix_tree_replace_slot(&token_tree, slot, conn); - } - spin_unlock_bh(&token_tree_lock); -} - -/** * mptcp_token_destroy_request - remove mptcp connection/token * @token - token of mptcp connection to remove * diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 91efae88e8c2..468fea1aebba 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -455,14 +455,6 @@ config NF_TABLES To compile it as a module, choose M here. if NF_TABLES - -config NF_TABLES_SET - tristate "Netfilter nf_tables set infrastructure" - help - This option enables the nf_tables set infrastructure that allows to - look up for elements in a set and to build one-way mappings between - matchings and actions. - config NF_TABLES_INET depends on IPV6 select NF_TABLES_IPV4 diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 3f572e5a975e..292e71dc7ba4 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -78,14 +78,17 @@ nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \ nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \ nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \ nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o \ - nft_chain_route.o nf_tables_offload.o + nft_chain_route.o nf_tables_offload.o \ + nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o \ + nft_set_pipapo.o -nf_tables_set-objs := nf_tables_set_core.o \ - nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o \ - nft_set_pipapo.o +ifdef CONFIG_X86_64 +ifneq (,$(findstring -DCONFIG_AS_AVX2=1,$(KBUILD_CFLAGS))) +nf_tables-objs += nft_set_pipapo_avx2.o +endif +endif obj-$(CONFIG_NF_TABLES) += nf_tables.o -obj-$(CONFIG_NF_TABLES_SET) += nf_tables_set.o obj-$(CONFIG_NFT_COMPAT) += nft_compat.o obj-$(CONFIG_NFT_CONNLIMIT) += nft_connlimit.o obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 0a2196f59106..486959f70cf3 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -46,7 +46,7 @@ struct bitmap_ip { u8 netmask; /* subnet netmask */ struct timer_list gc; /* garbage collection */ struct ip_set *set; /* attached to this ip_set */ - unsigned char extensions[0] /* data extensions */ + unsigned char extensions[] /* data extensions */ __aligned(__alignof__(u64)); }; diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 739e343efaf6..2310a316e0af 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -49,7 +49,7 @@ struct bitmap_ipmac { size_t memsize; /* members size */ struct timer_list gc; /* garbage collector */ struct ip_set *set; /* attached to this ip_set */ - unsigned char extensions[0] /* MAC + data extensions */ + unsigned char extensions[] /* MAC + data extensions */ __aligned(__alignof__(u64)); }; diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index b49978dd810d..e56ced66f202 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -37,7 +37,7 @@ struct bitmap_port { size_t memsize; /* members size */ struct timer_list gc; /* garbage collection */ struct ip_set *set; /* attached to this ip_set */ - unsigned char extensions[0] /* data extensions */ + unsigned char extensions[] /* data extensions */ __aligned(__alignof__(u64)); }; diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index e52d7b7597a0..1ee43752d6d3 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -76,7 +76,7 @@ struct hbucket { DECLARE_BITMAP(used, AHASH_MAX_TUNED); u8 size; /* size of the array */ u8 pos; /* position of the first free entry */ - unsigned char value[0] /* the array of the values */ + unsigned char value[] /* the array of the values */ __aligned(__alignof__(u64)); }; @@ -109,7 +109,7 @@ struct htable { u8 htable_bits; /* size of hash table == 2^htable_bits */ u32 maxelem; /* Maxelem per region */ struct ip_set_region *hregion; /* Region locks and ext sizes */ - struct hbucket __rcu *bucket[0]; /* hashtable buckets */ + struct hbucket __rcu *bucket[]; /* hashtable buckets */ }; #define hbucket(h, i) ((h)->bucket[i]) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 1927fc296f95..a18f8fe728e3 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2633,7 +2633,6 @@ void nf_conntrack_init_end(void) */ #define UNCONFIRMED_NULLS_VAL ((1<<30)+0) #define DYING_NULLS_VAL ((1<<30)+1) -#define TEMPLATE_NULLS_VAL ((1<<30)+2) int nf_conntrack_init_net(struct net *net) { diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 410809c669e1..9b57330c81f8 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -411,7 +411,7 @@ static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) *pos = cpu + 1; return per_cpu_ptr(net->ct.stat, cpu); } - + (*pos)++; return NULL; } @@ -1054,21 +1054,18 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) nf_conntrack_standalone_init_dccp_sysctl(net, table); nf_conntrack_standalone_init_gre_sysctl(net, table); - /* Don't export sysctls to unprivileged users */ + /* Don't allow unprivileged users to alter certain sysctls */ if (net->user_ns != &init_user_ns) { - table[NF_SYSCTL_CT_MAX].procname = NULL; - table[NF_SYSCTL_CT_ACCT].procname = NULL; - table[NF_SYSCTL_CT_HELPER].procname = NULL; -#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP - table[NF_SYSCTL_CT_TIMESTAMP].procname = NULL; -#endif + table[NF_SYSCTL_CT_MAX].mode = 0444; + table[NF_SYSCTL_CT_EXPECT_MAX].mode = 0444; + table[NF_SYSCTL_CT_HELPER].mode = 0444; #ifdef CONFIG_NF_CONNTRACK_EVENTS - table[NF_SYSCTL_CT_EVENTS].procname = NULL; + table[NF_SYSCTL_CT_EVENTS].mode = 0444; #endif - } - - if (!net_eq(&init_net, net)) table[NF_SYSCTL_CT_BUCKETS].mode = 0444; + } else if (!net_eq(&init_net, net)) { + table[NF_SYSCTL_CT_BUCKETS].mode = 0444; + } net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table); if (!net->ct.sysctl_header) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 8af28e10b4e6..9a477bd563b7 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -252,6 +252,19 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) } EXPORT_SYMBOL_GPL(flow_offload_add); +void flow_offload_refresh(struct nf_flowtable *flow_table, + struct flow_offload *flow) +{ + flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT; + + if (likely(!nf_flowtable_hw_offload(flow_table) || + !test_and_clear_bit(NF_FLOW_HW_REFRESH, &flow->flags))) + return; + + nf_flow_offload_add(flow_table, flow); +} +EXPORT_SYMBOL_GPL(flow_offload_refresh); + static inline bool nf_flow_has_expired(const struct flow_offload *flow) { return nf_flow_timeout_delta(flow->timeout) <= 0; @@ -372,6 +385,50 @@ static void nf_flow_offload_work_gc(struct work_struct *work) queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); } +int nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table, + flow_setup_cb_t *cb, void *cb_priv) +{ + struct flow_block *block = &flow_table->flow_block; + struct flow_block_cb *block_cb; + int err = 0; + + mutex_lock(&flow_table->flow_block_lock); + block_cb = flow_block_cb_lookup(block, cb, cb_priv); + if (block_cb) { + err = -EEXIST; + goto unlock; + } + + block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL); + if (IS_ERR(block_cb)) { + err = PTR_ERR(block_cb); + goto unlock; + } + + list_add_tail(&block_cb->list, &block->cb_list); + +unlock: + mutex_unlock(&flow_table->flow_block_lock); + return err; +} +EXPORT_SYMBOL_GPL(nf_flow_table_offload_add_cb); + +void nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, + flow_setup_cb_t *cb, void *cb_priv) +{ + struct flow_block *block = &flow_table->flow_block; + struct flow_block_cb *block_cb; + + mutex_lock(&flow_table->flow_block_lock); + block_cb = flow_block_cb_lookup(block, cb, cb_priv); + if (block_cb) + list_del(&block_cb->list); + else + WARN_ON(true); + mutex_unlock(&flow_table->flow_block_lock); +} +EXPORT_SYMBOL_GPL(nf_flow_table_offload_del_cb); + static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, __be16 port, __be16 new_port) { @@ -494,6 +551,7 @@ int nf_flow_table_init(struct nf_flowtable *flowtable) INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); flow_block_init(&flowtable->flow_block); + mutex_init(&flowtable->flow_block_lock); err = rhashtable_init(&flowtable->rhashtable, &nf_flow_offload_rhash_params); @@ -550,11 +608,13 @@ void nf_flow_table_free(struct nf_flowtable *flow_table) mutex_lock(&flowtable_lock); list_del(&flow_table->list); mutex_unlock(&flowtable_lock); + cancel_delayed_work_sync(&flow_table->gc_work); nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table); nf_flow_table_offload_flush(flow_table); rhashtable_destroy(&flow_table->rhashtable); + mutex_destroy(&flow_table->flow_block_lock); } EXPORT_SYMBOL_GPL(nf_flow_table_free); diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 9e563fd3da0f..5272721080f8 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -232,13 +232,6 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, return NF_STOLEN; } -static bool nf_flow_offload_refresh(struct nf_flowtable *flow_table, - struct flow_offload *flow) -{ - return nf_flowtable_hw_offload(flow_table) && - test_and_clear_bit(NF_FLOW_HW_REFRESH, &flow->flags); -} - unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -279,8 +272,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff)) return NF_ACCEPT; - if (unlikely(nf_flow_offload_refresh(flow_table, flow))) - nf_flow_offload_add(flow_table, flow); + flow_offload_refresh(flow_table, flow); if (nf_flow_offload_dst_check(&rt->dst)) { flow_offload_teardown(flow); @@ -290,7 +282,6 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0) return NF_DROP; - flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT; iph = ip_hdr(skb); ip_decrease_ttl(iph); skb->tstamp = 0; @@ -508,8 +499,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, sizeof(*ip6h))) return NF_ACCEPT; - if (unlikely(nf_flow_offload_refresh(flow_table, flow))) - nf_flow_offload_add(flow_table, flow); + flow_offload_refresh(flow_table, flow); if (nf_flow_offload_dst_check(&rt->dst)) { flow_offload_teardown(flow); @@ -522,7 +512,6 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, if (nf_flow_nat_ipv6(flow, skb, dir) < 0) return NF_DROP; - flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT; ip6h = ipv6_hdr(skb); ip6h->hop_limit--; skb->tstamp = 0; diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 06f00cdc3891..ad549317af30 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -7,6 +7,7 @@ #include <linux/tc_act/tc_csum.h> #include <net/flow_offload.h> #include <net/netfilter/nf_flow_table.h> +#include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_tuple.h> @@ -23,38 +24,65 @@ struct flow_offload_work { struct flow_offload *flow; }; -struct nf_flow_key { - struct flow_dissector_key_meta meta; - struct flow_dissector_key_control control; - struct flow_dissector_key_basic basic; - union { - struct flow_dissector_key_ipv4_addrs ipv4; - struct flow_dissector_key_ipv6_addrs ipv6; - }; - struct flow_dissector_key_tcp tcp; - struct flow_dissector_key_ports tp; -} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ - -struct nf_flow_match { - struct flow_dissector dissector; - struct nf_flow_key key; - struct nf_flow_key mask; -}; - -struct nf_flow_rule { - struct nf_flow_match match; - struct flow_rule *rule; -}; - #define NF_FLOW_DISSECTOR(__match, __type, __field) \ (__match)->dissector.offset[__type] = \ offsetof(struct nf_flow_key, __field) +static void nf_flow_rule_lwt_match(struct nf_flow_match *match, + struct ip_tunnel_info *tun_info) +{ + struct nf_flow_key *mask = &match->mask; + struct nf_flow_key *key = &match->key; + unsigned int enc_keys; + + if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX)) + return; + + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control); + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id); + key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id); + mask->enc_key_id.keyid = 0xffffffff; + enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | + BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL); + + if (ip_tunnel_info_af(tun_info) == AF_INET) { + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + enc_ipv4); + key->enc_ipv4.src = tun_info->key.u.ipv4.dst; + key->enc_ipv4.dst = tun_info->key.u.ipv4.src; + if (key->enc_ipv4.src) + mask->enc_ipv4.src = 0xffffffff; + if (key->enc_ipv4.dst) + mask->enc_ipv4.dst = 0xffffffff; + enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS); + key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + } else { + memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst, + sizeof(struct in6_addr)); + memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src, + sizeof(struct in6_addr)); + if (memcmp(&key->enc_ipv6.src, &in6addr_any, + sizeof(struct in6_addr))) + memset(&key->enc_ipv6.src, 0xff, + sizeof(struct in6_addr)); + if (memcmp(&key->enc_ipv6.dst, &in6addr_any, + sizeof(struct in6_addr))) + memset(&key->enc_ipv6.dst, 0xff, + sizeof(struct in6_addr)); + enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS); + key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + } + + match->dissector.used_keys |= enc_keys; +} + static int nf_flow_rule_match(struct nf_flow_match *match, - const struct flow_offload_tuple *tuple) + const struct flow_offload_tuple *tuple, + struct dst_entry *other_dst) { struct nf_flow_key *mask = &match->mask; struct nf_flow_key *key = &match->key; + struct ip_tunnel_info *tun_info; NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta); NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control); @@ -64,6 +92,11 @@ static int nf_flow_rule_match(struct nf_flow_match *match, NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp); NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp); + if (other_dst->lwtstate) { + tun_info = lwt_tun_info(other_dst->lwtstate); + nf_flow_rule_lwt_match(match, tun_info); + } + key->meta.ingress_ifindex = tuple->iifidx; mask->meta.ingress_ifindex = 0xffffffff; @@ -442,10 +475,52 @@ static void flow_offload_redirect(const struct flow_offload *flow, dev_hold(rt->dst.dev); } +static void flow_offload_encap_tunnel(const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) +{ + struct flow_action_entry *entry; + struct dst_entry *dst; + + dst = flow->tuplehash[dir].tuple.dst_cache; + if (dst->lwtstate) { + struct ip_tunnel_info *tun_info; + + tun_info = lwt_tun_info(dst->lwtstate); + if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) { + entry = flow_action_entry_next(flow_rule); + entry->id = FLOW_ACTION_TUNNEL_ENCAP; + entry->tunnel = tun_info; + } + } +} + +static void flow_offload_decap_tunnel(const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) +{ + struct flow_action_entry *entry; + struct dst_entry *dst; + + dst = flow->tuplehash[!dir].tuple.dst_cache; + if (dst->lwtstate) { + struct ip_tunnel_info *tun_info; + + tun_info = lwt_tun_info(dst->lwtstate); + if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) { + entry = flow_action_entry_next(flow_rule); + entry->id = FLOW_ACTION_TUNNEL_DECAP; + } + } +} + int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { + flow_offload_decap_tunnel(flow, dir, flow_rule); + flow_offload_encap_tunnel(flow, dir, flow_rule); + if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 || flow_offload_eth_dst(net, flow, dir, flow_rule) < 0) return -1; @@ -472,6 +547,9 @@ int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { + flow_offload_decap_tunnel(flow, dir, flow_rule); + flow_offload_encap_tunnel(flow, dir, flow_rule); + if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 || flow_offload_eth_dst(net, flow, dir, flow_rule) < 0) return -1; @@ -502,6 +580,7 @@ nf_flow_offload_rule_alloc(struct net *net, const struct flow_offload *flow = offload->flow; const struct flow_offload_tuple *tuple; struct nf_flow_rule *flow_rule; + struct dst_entry *other_dst; int err = -ENOMEM; flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL); @@ -517,7 +596,8 @@ nf_flow_offload_rule_alloc(struct net *net, flow_rule->rule->match.key = &flow_rule->match.key; tuple = &flow->tuplehash[dir].tuple; - err = nf_flow_rule_match(&flow_rule->match, tuple); + other_dst = flow->tuplehash[!dir].tuple.dst_cache; + err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst); if (err < 0) goto err_flow_match; @@ -597,6 +677,7 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable, struct nf_flow_rule *flow_rule, enum flow_offload_tuple_dir dir, int priority, int cmd, + struct flow_stats *stats, struct list_head *block_cb_list) { struct flow_cls_offload cls_flow = {}; @@ -610,6 +691,7 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable, if (cmd == FLOW_CLS_REPLACE) cls_flow.rule = flow_rule->rule; + mutex_lock(&flowtable->flow_block_lock); list_for_each_entry(block_cb, block_cb_list, list) { err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow, block_cb->cb_priv); @@ -618,6 +700,10 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable, i++; } + mutex_unlock(&flowtable->flow_block_lock); + + if (cmd == FLOW_CLS_STATS) + memcpy(stats, &cls_flow.stats, sizeof(*stats)); return i; } @@ -628,7 +714,7 @@ static int flow_offload_tuple_add(struct flow_offload_work *offload, { return nf_flow_offload_tuple(offload->flowtable, offload->flow, flow_rule, dir, offload->priority, - FLOW_CLS_REPLACE, + FLOW_CLS_REPLACE, NULL, &offload->flowtable->flow_block.cb_list); } @@ -636,7 +722,7 @@ static void flow_offload_tuple_del(struct flow_offload_work *offload, enum flow_offload_tuple_dir dir) { nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir, - offload->priority, FLOW_CLS_DESTROY, + offload->priority, FLOW_CLS_DESTROY, NULL, &offload->flowtable->flow_block.cb_list); } @@ -682,19 +768,9 @@ static void flow_offload_tuple_stats(struct flow_offload_work *offload, enum flow_offload_tuple_dir dir, struct flow_stats *stats) { - struct nf_flowtable *flowtable = offload->flowtable; - struct flow_cls_offload cls_flow = {}; - struct flow_block_cb *block_cb; - struct netlink_ext_ack extack; - __be16 proto = ETH_P_ALL; - - nf_flow_offload_init(&cls_flow, proto, offload->priority, - FLOW_CLS_STATS, - &offload->flow->tuplehash[dir].tuple, &extack); - - list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list) - block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow, block_cb->cb_priv); - memcpy(stats, &cls_flow.stats, sizeof(*stats)); + nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir, + offload->priority, FLOW_CLS_STATS, stats, + &offload->flowtable->flow_block.cb_list); } static void flow_offload_work_stats(struct flow_offload_work *offload) @@ -839,25 +915,47 @@ static int nf_flow_table_block_setup(struct nf_flowtable *flowtable, return err; } -static int nf_flow_table_offload_cmd(struct flow_block_offload *bo, - struct nf_flowtable *flowtable, - struct net_device *dev, - enum flow_block_command cmd, - struct netlink_ext_ack *extack) +static void nf_flow_table_block_offload_init(struct flow_block_offload *bo, + struct net *net, + enum flow_block_command cmd, + struct nf_flowtable *flowtable, + struct netlink_ext_ack *extack) { - int err; - - if (!dev->netdev_ops->ndo_setup_tc) - return -EOPNOTSUPP; - memset(bo, 0, sizeof(*bo)); - bo->net = dev_net(dev); + bo->net = net; bo->block = &flowtable->flow_block; bo->command = cmd; bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; bo->extack = extack; INIT_LIST_HEAD(&bo->cb_list); +} + +static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo, + struct nf_flowtable *flowtable, + struct net_device *dev, + enum flow_block_command cmd, + struct netlink_ext_ack *extack) +{ + nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable, + extack); + flow_indr_block_call(dev, bo, cmd); + if (list_empty(&bo->cb_list)) + return -EOPNOTSUPP; + + return 0; +} + +static int nf_flow_table_offload_cmd(struct flow_block_offload *bo, + struct nf_flowtable *flowtable, + struct net_device *dev, + enum flow_block_command cmd, + struct netlink_ext_ack *extack) +{ + int err; + + nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable, + extack); err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo); if (err < 0) return err; @@ -876,7 +974,12 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, if (!nf_flowtable_hw_offload(flowtable)) return 0; - err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd, &extack); + if (dev->netdev_ops->ndo_setup_tc) + err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd, + &extack); + else + err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd, + &extack); if (err < 0) return err; @@ -884,10 +987,75 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, } EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup); +static void nf_flow_table_indr_block_ing_cmd(struct net_device *dev, + struct nf_flowtable *flowtable, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command cmd) +{ + struct netlink_ext_ack extack = {}; + struct flow_block_offload bo; + + if (!flowtable) + return; + + nf_flow_table_block_offload_init(&bo, dev_net(dev), cmd, flowtable, + &extack); + + cb(dev, cb_priv, TC_SETUP_FT, &bo); + + nf_flow_table_block_setup(flowtable, &bo, cmd); +} + +static void nf_flow_table_indr_block_cb_cmd(struct nf_flowtable *flowtable, + struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command cmd) +{ + if (!(flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD)) + return; + + nf_flow_table_indr_block_ing_cmd(dev, flowtable, cb, cb_priv, cmd); +} + +static void nf_flow_table_indr_block_cb(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command cmd) +{ + struct net *net = dev_net(dev); + struct nft_flowtable *nft_ft; + struct nft_table *table; + struct nft_hook *hook; + + mutex_lock(&net->nft.commit_mutex); + list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(nft_ft, &table->flowtables, list) { + list_for_each_entry(hook, &nft_ft->hook_list, list) { + if (hook->ops.dev != dev) + continue; + + nf_flow_table_indr_block_cb_cmd(&nft_ft->data, + dev, cb, + cb_priv, cmd); + } + } + } + mutex_unlock(&net->nft.commit_mutex); +} + +static struct flow_indr_block_entry block_ing_entry = { + .cb = nf_flow_table_indr_block_cb, + .list = LIST_HEAD_INIT(block_ing_entry.list), +}; + int nf_flow_table_offload_init(void) { INIT_WORK(&nf_flow_offload_work, flow_offload_work_handler); + flow_indr_add_block_cb(&block_ing_entry); + return 0; } @@ -896,6 +1064,8 @@ void nf_flow_table_offload_exit(void) struct flow_offload_work *offload, *next; LIST_HEAD(offload_pending_list); + flow_indr_del_block_cb(&block_ing_entry); + cancel_work_sync(&nf_flow_offload_work); list_for_each_entry_safe(offload, next, &offload_pending_list, list) { diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index b0930d4aba22..b9cbe1e2453e 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -267,7 +267,7 @@ static void *synproxy_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) *pos = cpu + 1; return per_cpu_ptr(snet->stats, cpu); } - + (*pos)++; return NULL; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d1318bdf49ca..f92fb6003745 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1405,6 +1405,11 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, lockdep_commit_lock_is_held(net)); if (nft_dump_stats(skb, stats)) goto nla_put_failure; + + if ((chain->flags & NFT_CHAIN_HW_OFFLOAD) && + nla_put_be32(skb, NFTA_CHAIN_FLAGS, + htonl(NFT_CHAIN_HW_OFFLOAD))) + goto nla_put_failure; } if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use))) @@ -2518,8 +2523,8 @@ static void nf_tables_expr_destroy(const struct nft_ctx *ctx, module_put(type->owner); } -struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, - const struct nlattr *nla) +static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, + const struct nlattr *nla) { struct nft_expr_info info; struct nft_expr *expr; @@ -3261,25 +3266,17 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, /* * Sets */ - -static LIST_HEAD(nf_tables_set_types); - -int nft_register_set(struct nft_set_type *type) -{ - nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_add_tail_rcu(&type->list, &nf_tables_set_types); - nfnl_unlock(NFNL_SUBSYS_NFTABLES); - return 0; -} -EXPORT_SYMBOL_GPL(nft_register_set); - -void nft_unregister_set(struct nft_set_type *type) -{ - nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_del_rcu(&type->list); - nfnl_unlock(NFNL_SUBSYS_NFTABLES); -} -EXPORT_SYMBOL_GPL(nft_unregister_set); +static const struct nft_set_type *nft_set_types[] = { + &nft_set_hash_fast_type, + &nft_set_hash_type, + &nft_set_rhash_type, + &nft_set_bitmap_type, + &nft_set_rbtree_type, +#if defined(CONFIG_X86_64) && defined(CONFIG_AS_AVX2) + &nft_set_pipapo_avx2_type, +#endif + &nft_set_pipapo_type, +}; #define NFT_SET_FEATURES (NFT_SET_INTERVAL | NFT_SET_MAP | \ NFT_SET_TIMEOUT | NFT_SET_OBJECT | \ @@ -3305,15 +3302,11 @@ nft_select_set_ops(const struct nft_ctx *ctx, struct nft_set_estimate est, best; const struct nft_set_type *type; u32 flags = 0; + int i; lockdep_assert_held(&ctx->net->nft.commit_mutex); lockdep_nfnl_nft_mutex_not_held(); -#ifdef CONFIG_MODULES - if (list_empty(&nf_tables_set_types)) { - if (nft_request_module(ctx->net, "nft-set") == -EAGAIN) - return ERR_PTR(-EAGAIN); - } -#endif + if (nla[NFTA_SET_FLAGS] != NULL) flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); @@ -3322,7 +3315,8 @@ nft_select_set_ops(const struct nft_ctx *ctx, best.lookup = ~0; best.space = ~0; - list_for_each_entry(type, &nf_tables_set_types, list) { + for (i = 0; i < ARRAY_SIZE(nft_set_types); i++) { + type = nft_set_types[i]; ops = &type->ops; if (!nft_set_ops_candidate(type, flags)) @@ -3353,11 +3347,6 @@ nft_select_set_ops(const struct nft_ctx *ctx, break; } - if (!try_module_get(type->owner)) - continue; - if (bops != NULL) - module_put(to_set_type(bops)->owner); - bops = ops; best = est; } @@ -4056,10 +4045,8 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, size = ops->privsize(nla, &desc); set = kvzalloc(sizeof(*set) + size + udlen, GFP_KERNEL); - if (!set) { - err = -ENOMEM; - goto err1; - } + if (!set) + return -ENOMEM; name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL); if (!name) { @@ -4118,8 +4105,6 @@ err3: kfree(set->name); err2: kvfree(set); -err1: - module_put(to_set_type(ops)->owner); return err; } @@ -4129,7 +4114,6 @@ static void nft_set_destroy(struct nft_set *set) return; set->ops->destroy(set); - module_put(to_set_type(set->ops)->owner); kfree(set->name); kvfree(set); } @@ -4307,7 +4291,6 @@ const struct nft_set_ext_type nft_set_ext_types[] = { .align = __alignof__(u32), }, }; -EXPORT_SYMBOL_GPL(nft_set_ext_types); /* * Set elements @@ -4796,6 +4779,36 @@ static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx, return trans; } +struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nlattr *attr) +{ + struct nft_expr *expr; + int err; + + expr = nft_expr_init(ctx, attr); + if (IS_ERR(expr)) + return expr; + + err = -EOPNOTSUPP; + if (!(expr->ops->type->flags & NFT_EXPR_STATEFUL)) + goto err_set_elem_expr; + + if (expr->ops->type->flags & NFT_EXPR_GC) { + if (set->flags & NFT_SET_TIMEOUT) + goto err_set_elem_expr; + if (!set->ops->gc_init) + goto err_set_elem_expr; + set->ops->gc_init(set); + } + + return expr; + +err_set_elem_expr: + nft_expr_destroy(ctx, expr); + return ERR_PTR(err); +} + void *nft_set_elem_init(const struct nft_set *set, const struct nft_set_ext_tmpl *tmpl, const u32 *key, const u32 *key_end, @@ -4878,6 +4891,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, struct nft_set_elem elem; struct nft_set_binding *binding; struct nft_object *obj = NULL; + struct nft_expr *expr = NULL; struct nft_userdata *udata; struct nft_data_desc desc; struct nft_data data; @@ -4945,10 +4959,17 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, return err; } + if (nla[NFTA_SET_ELEM_EXPR] != NULL) { + expr = nft_set_elem_expr_alloc(ctx, set, + nla[NFTA_SET_ELEM_EXPR]); + if (IS_ERR(expr)) + return PTR_ERR(expr); + } + err = nft_setelem_parse_key(ctx, set, &elem.key.val, nla[NFTA_SET_ELEM_KEY]); if (err < 0) - return err; + goto err_set_elem_expr; nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); @@ -4967,6 +4988,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); } + if (expr) + nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPR, + expr->ops->size); + if (nla[NFTA_SET_ELEM_OBJREF] != NULL) { if (!(set->flags & NFT_SET_OBJECT)) { err = -EINVAL; @@ -5051,6 +5076,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, *nft_set_ext_obj(ext) = obj; obj->use++; } + if (expr) { + memcpy(nft_set_ext_expr(ext), expr, expr->ops->size); + kfree(expr); + } trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); if (trans == NULL) @@ -5106,6 +5135,9 @@ err_parse_key_end: nft_data_release(&elem.key_end.val, NFT_DATA_VALUE); err_parse_key: nft_data_release(&elem.key.val, NFT_DATA_VALUE); +err_set_elem_expr: + if (expr != NULL) + nft_expr_destroy(ctx, expr); return err; } @@ -5360,7 +5392,6 @@ void nft_set_gc_batch_release(struct rcu_head *rcu) nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true); kfree(gcb); } -EXPORT_SYMBOL_GPL(nft_set_gc_batch_release); struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, gfp_t gfp) @@ -5373,7 +5404,6 @@ struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, gcb->head.set = set; return gcb; } -EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc); /* * Stateful objects @@ -6300,8 +6330,13 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, goto err4; err = nft_register_flowtable_net_hooks(ctx.net, table, flowtable); - if (err < 0) + if (err < 0) { + list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) { + list_del_rcu(&hook->list); + kfree_rcu(hook, rcu); + } goto err4; + } err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); if (err < 0) @@ -7378,13 +7413,8 @@ static void nf_tables_module_autoload(struct net *net) list_splice_init(&net->nft.module_list, &module_list); mutex_unlock(&net->nft.commit_mutex); list_for_each_entry_safe(req, next, &module_list, list) { - if (req->done) { - list_del(&req->list); - kfree(req); - } else { - request_module("%s", req->module); - req->done = true; - } + request_module("%s", req->module); + req->done = true; } mutex_lock(&net->nft.commit_mutex); list_splice(&module_list, &net->nft.module_list); @@ -8167,6 +8197,7 @@ static void __net_exit nf_tables_exit_net(struct net *net) __nft_release_tables(net); mutex_unlock(&net->nft.commit_mutex); WARN_ON_ONCE(!list_empty(&net->nft.tables)); + WARN_ON_ONCE(!list_empty(&net->nft.module_list)); } static struct pernet_operations nf_tables_net_ops = { diff --git a/net/netfilter/nf_tables_set_core.c b/net/netfilter/nf_tables_set_core.c deleted file mode 100644 index 586b621007eb..000000000000 --- a/net/netfilter/nf_tables_set_core.c +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <linux/module.h> -#include <net/netfilter/nf_tables_core.h> - -static int __init nf_tables_set_module_init(void) -{ - nft_register_set(&nft_set_hash_fast_type); - nft_register_set(&nft_set_hash_type); - nft_register_set(&nft_set_rhash_type); - nft_register_set(&nft_set_bitmap_type); - nft_register_set(&nft_set_rbtree_type); - nft_register_set(&nft_set_pipapo_type); - - return 0; -} - -static void __exit nf_tables_set_module_exit(void) -{ - nft_unregister_set(&nft_set_pipapo_type); - nft_unregister_set(&nft_set_rbtree_type); - nft_unregister_set(&nft_set_bitmap_type); - nft_unregister_set(&nft_set_rhash_type); - nft_unregister_set(&nft_set_hash_type); - nft_unregister_set(&nft_set_hash_fast_type); -} - -module_init(nf_tables_set_module_init); -module_exit(nf_tables_set_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NFT_SET(); diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 2481470dec36..5827117f2635 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -33,7 +33,7 @@ struct nf_acct { refcount_t refcnt; char name[NFACCT_NAME_MAX]; struct rcu_head rcu_head; - char data[0]; + char data[]; }; struct nfacct_filter { diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index de3a9596b7f1..a5f294aa8e4c 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -742,6 +742,8 @@ static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = { [NFCTH_NAME] = { .type = NLA_NUL_STRING, .len = NF_CT_HELPER_NAME_LEN-1 }, [NFCTH_QUEUE_NUM] = { .type = NLA_U32, }, + [NFCTH_PRIV_DATA_LEN] = { .type = NLA_U32, }, + [NFCTH_STATUS] = { .type = NLA_U32, }, }; static const struct nfnl_callback nfnl_cthelper_cb[NFNL_MSG_CTHELPER_MAX] = { diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 0ed2281f03be..bc37d6c59db4 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -93,7 +93,7 @@ static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = { static int nft_bitwise_init_bool(struct nft_bitwise *priv, const struct nlattr *const tb[]) { - struct nft_data_desc d1, d2; + struct nft_data_desc mask, xor; int err; if (tb[NFTA_BITWISE_DATA]) @@ -103,29 +103,29 @@ static int nft_bitwise_init_bool(struct nft_bitwise *priv, !tb[NFTA_BITWISE_XOR]) return -EINVAL; - err = nft_data_init(NULL, &priv->mask, sizeof(priv->mask), &d1, + err = nft_data_init(NULL, &priv->mask, sizeof(priv->mask), &mask, tb[NFTA_BITWISE_MASK]); if (err < 0) return err; - if (d1.type != NFT_DATA_VALUE || d1.len != priv->len) { + if (mask.type != NFT_DATA_VALUE || mask.len != priv->len) { err = -EINVAL; goto err1; } - err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &d2, + err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &xor, tb[NFTA_BITWISE_XOR]); if (err < 0) goto err1; - if (d2.type != NFT_DATA_VALUE || d2.len != priv->len) { + if (xor.type != NFT_DATA_VALUE || xor.len != priv->len) { err = -EINVAL; goto err2; } return 0; err2: - nft_data_release(&priv->xor, d2.type); + nft_data_release(&priv->xor, xor.type); err1: - nft_data_release(&priv->mask, d1.type); + nft_data_release(&priv->mask, mask.type); return err; } diff --git a/net/netfilter/nft_chain_nat.c b/net/netfilter/nft_chain_nat.c index ff9ac8ae0031..eac4a901233f 100644 --- a/net/netfilter/nft_chain_nat.c +++ b/net/netfilter/nft_chain_nat.c @@ -89,6 +89,7 @@ static const struct nft_chain_type nft_chain_nat_inet = { .name = "nat", .type = NFT_CHAIN_T_NAT, .family = NFPROTO_INET, + .owner = THIS_MODULE, .hook_mask = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_LOCAL_OUT) | diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 683785225a3e..46ab28ec4b53 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -81,7 +81,6 @@ void nft_dynset_eval(const struct nft_expr *expr, const struct nft_dynset *priv = nft_expr_priv(expr); struct nft_set *set = priv->set; const struct nft_set_ext *ext; - const struct nft_expr *sexpr; u64 timeout; if (priv->op == NFT_DYNSET_OP_DELETE) { @@ -91,18 +90,13 @@ void nft_dynset_eval(const struct nft_expr *expr, if (set->ops->update(set, ®s->data[priv->sreg_key], nft_dynset_new, expr, regs, &ext)) { - sexpr = NULL; - if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) - sexpr = nft_set_ext_expr(ext); - if (priv->op == NFT_DYNSET_OP_UPDATE && nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { timeout = priv->timeout ? : set->timeout; *nft_set_ext_expiration(ext) = get_jiffies_64() + timeout; } - if (sexpr != NULL) - sexpr->ops->eval(sexpr, regs, pkt); + nft_set_elem_update_expr(ext, regs, pkt); if (priv->invert) regs->verdict.code = NFT_BREAK; @@ -206,21 +200,10 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (!(set->flags & NFT_SET_EVAL)) return -EINVAL; - priv->expr = nft_expr_init(ctx, tb[NFTA_DYNSET_EXPR]); + priv->expr = nft_set_elem_expr_alloc(ctx, set, + tb[NFTA_DYNSET_EXPR]); if (IS_ERR(priv->expr)) return PTR_ERR(priv->expr); - - err = -EOPNOTSUPP; - if (!(priv->expr->ops->type->flags & NFT_EXPR_STATEFUL)) - goto err1; - - if (priv->expr->ops->type->flags & NFT_EXPR_GC) { - if (set->flags & NFT_SET_TIMEOUT) - goto err1; - if (!set->ops->gc_init) - goto err1; - set->ops->gc_init(set); - } } nft_set_ext_prepare(&priv->tmpl); diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 660bad688e2b..1e70359d633c 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -43,6 +43,7 @@ void nft_lookup_eval(const struct nft_expr *expr, nft_data_copy(®s->data[priv->dreg], nft_set_ext_data(ext), set->dlen); + nft_set_elem_update_expr(ext, regs, pkt); } static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = { diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 1993af3a2979..a7de3a58f553 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -129,6 +129,7 @@ static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 }, [NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 }, [NFTA_PAYLOAD_CSUM_OFFSET] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_CSUM_FLAGS] = { .type = NLA_U32 }, }; static int nft_payload_init(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 87e8d9ba0c9b..1cb2e67e6e03 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -293,8 +293,7 @@ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features, return true; } -struct nft_set_type nft_set_bitmap_type __read_mostly = { - .owner = THIS_MODULE, +const struct nft_set_type nft_set_bitmap_type = { .ops = { .privsize = nft_bitmap_privsize, .elemsize = offsetof(struct nft_bitmap_elem, ext), diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index d350a7cd3af0..4d3f147e8d8d 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -662,8 +662,7 @@ static bool nft_hash_fast_estimate(const struct nft_set_desc *desc, u32 features return true; } -struct nft_set_type nft_set_rhash_type __read_mostly = { - .owner = THIS_MODULE, +const struct nft_set_type nft_set_rhash_type = { .features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT | NFT_SET_EVAL, .ops = { @@ -686,8 +685,7 @@ struct nft_set_type nft_set_rhash_type __read_mostly = { }, }; -struct nft_set_type nft_set_hash_type __read_mostly = { - .owner = THIS_MODULE, +const struct nft_set_type nft_set_hash_type = { .features = NFT_SET_MAP | NFT_SET_OBJECT, .ops = { .privsize = nft_hash_privsize, @@ -706,8 +704,7 @@ struct nft_set_type nft_set_hash_type __read_mostly = { }, }; -struct nft_set_type nft_set_hash_fast_type __read_mostly = { - .owner = THIS_MODULE, +const struct nft_set_type nft_set_hash_fast_type = { .features = NFT_SET_MAP | NFT_SET_OBJECT, .ops = { .privsize = nft_hash_privsize, diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 4fc0c924ed5d..c1afb6c94edc 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -330,144 +330,22 @@ #include <linux/kernel.h> #include <linux/init.h> -#include <linux/log2.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <uapi/linux/netfilter/nf_tables.h> -#include <net/ipv6.h> /* For the maximum length of a field */ #include <linux/bitmap.h> #include <linux/bitops.h> -/* Count of concatenated fields depends on count of 32-bit nftables registers */ -#define NFT_PIPAPO_MAX_FIELDS NFT_REG32_COUNT - -/* Largest supported field size */ -#define NFT_PIPAPO_MAX_BYTES (sizeof(struct in6_addr)) -#define NFT_PIPAPO_MAX_BITS (NFT_PIPAPO_MAX_BYTES * BITS_PER_BYTE) - -/* Number of bits to be grouped together in lookup table buckets, arbitrary */ -#define NFT_PIPAPO_GROUP_BITS 4 -#define NFT_PIPAPO_GROUPS_PER_BYTE (BITS_PER_BYTE / NFT_PIPAPO_GROUP_BITS) - -/* Fields are padded to 32 bits in input registers */ -#define NFT_PIPAPO_GROUPS_PADDED_SIZE(x) \ - (round_up((x) / NFT_PIPAPO_GROUPS_PER_BYTE, sizeof(u32))) -#define NFT_PIPAPO_GROUPS_PADDING(x) \ - (NFT_PIPAPO_GROUPS_PADDED_SIZE((x)) - (x) / NFT_PIPAPO_GROUPS_PER_BYTE) - -/* Number of buckets, given by 2 ^ n, with n grouped bits */ -#define NFT_PIPAPO_BUCKETS (1 << NFT_PIPAPO_GROUP_BITS) - -/* Each n-bit range maps to up to n * 2 rules */ -#define NFT_PIPAPO_MAP_NBITS (const_ilog2(NFT_PIPAPO_MAX_BITS * 2)) - -/* Use the rest of mapping table buckets for rule indices, but it makes no sense - * to exceed 32 bits - */ -#if BITS_PER_LONG == 64 -#define NFT_PIPAPO_MAP_TOBITS 32 -#else -#define NFT_PIPAPO_MAP_TOBITS (BITS_PER_LONG - NFT_PIPAPO_MAP_NBITS) -#endif - -/* ...which gives us the highest allowed index for a rule */ -#define NFT_PIPAPO_RULE0_MAX ((1UL << (NFT_PIPAPO_MAP_TOBITS - 1)) \ - - (1UL << NFT_PIPAPO_MAP_NBITS)) - -#define nft_pipapo_for_each_field(field, index, match) \ - for ((field) = (match)->f, (index) = 0; \ - (index) < (match)->field_count; \ - (index)++, (field)++) - -/** - * union nft_pipapo_map_bucket - Bucket of mapping table - * @to: First rule number (in next field) this rule maps to - * @n: Number of rules (in next field) this rule maps to - * @e: If there's no next field, pointer to element this rule maps to - */ -union nft_pipapo_map_bucket { - struct { -#if BITS_PER_LONG == 64 - static_assert(NFT_PIPAPO_MAP_TOBITS <= 32); - u32 to; - - static_assert(NFT_PIPAPO_MAP_NBITS <= 32); - u32 n; -#else - unsigned long to:NFT_PIPAPO_MAP_TOBITS; - unsigned long n:NFT_PIPAPO_MAP_NBITS; -#endif - }; - struct nft_pipapo_elem *e; -}; - -/** - * struct nft_pipapo_field - Lookup, mapping tables and related data for a field - * @groups: Amount of 4-bit groups - * @rules: Number of inserted rules - * @bsize: Size of each bucket in lookup table, in longs - * @lt: Lookup table: 'groups' rows of NFT_PIPAPO_BUCKETS buckets - * @mt: Mapping table: one bucket per rule - */ -struct nft_pipapo_field { - int groups; - unsigned long rules; - size_t bsize; - unsigned long *lt; - union nft_pipapo_map_bucket *mt; -}; - -/** - * struct nft_pipapo_match - Data used for lookup and matching - * @field_count Amount of fields in set - * @scratch: Preallocated per-CPU maps for partial matching results - * @bsize_max: Maximum lookup table bucket size of all fields, in longs - * @rcu Matching data is swapped on commits - * @f: Fields, with lookup and mapping tables - */ -struct nft_pipapo_match { - int field_count; - unsigned long * __percpu *scratch; - size_t bsize_max; - struct rcu_head rcu; - struct nft_pipapo_field f[0]; -}; +#include "nft_set_pipapo_avx2.h" +#include "nft_set_pipapo.h" /* Current working bitmap index, toggled between field matches */ static DEFINE_PER_CPU(bool, nft_pipapo_scratch_index); /** - * struct nft_pipapo - Representation of a set - * @match: Currently in-use matching data - * @clone: Copy where pending insertions and deletions are kept - * @groups: Total amount of 4-bit groups for fields in this set - * @width: Total bytes to be matched for one packet, including padding - * @dirty: Working copy has pending insertions or deletions - * @last_gc: Timestamp of last garbage collection run, jiffies - */ -struct nft_pipapo { - struct nft_pipapo_match __rcu *match; - struct nft_pipapo_match *clone; - int groups; - int width; - bool dirty; - unsigned long last_gc; -}; - -struct nft_pipapo_elem; - -/** - * struct nft_pipapo_elem - API-facing representation of single set element - * @ext: nftables API extensions - */ -struct nft_pipapo_elem { - struct nft_set_ext ext; -}; - -/** * pipapo_refill() - For each set bit, set bits from selected mapping table item * @map: Bitmap to be scanned for set bits * @len: Length of bitmap in longs @@ -484,9 +362,8 @@ struct nft_pipapo_elem { * * Return: -1 on no match, bit position on 'match_only', 0 otherwise. */ -static int pipapo_refill(unsigned long *map, int len, int rules, - unsigned long *dst, union nft_pipapo_map_bucket *mt, - bool match_only) +int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, + union nft_pipapo_map_bucket *mt, bool match_only) { unsigned long bitset; int k, ret = -1; @@ -559,26 +436,18 @@ static bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, nft_pipapo_for_each_field(f, i, m) { bool last = i == m->field_count - 1; - unsigned long *lt = f->lt; - int b, group; + int b; - /* For each 4-bit group: select lookup table bucket depending on + /* For each bit group: select lookup table bucket depending on * packet bytes value, then AND bucket value */ - for (group = 0; group < f->groups; group += 2) { - u8 v; - - v = *rp >> 4; - __bitmap_and(res_map, res_map, lt + v * f->bsize, - f->bsize * BITS_PER_LONG); - lt += f->bsize * NFT_PIPAPO_BUCKETS; - - v = *rp & 0x0f; - rp++; - __bitmap_and(res_map, res_map, lt + v * f->bsize, - f->bsize * BITS_PER_LONG); - lt += f->bsize * NFT_PIPAPO_BUCKETS; - } + if (likely(f->bb == 8)) + pipapo_and_field_buckets_8bit(f, res_map, rp); + else + pipapo_and_field_buckets_4bit(f, res_map, rp); + NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4; + + rp += f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f); /* Now populate the bitmap for the next field, unless this is * the last field, in which case return the matched 'ext' @@ -621,7 +490,7 @@ next_match: map_index = !map_index; swap(res_map, fill_map); - rp += NFT_PIPAPO_GROUPS_PADDING(f->groups); + rp += NFT_PIPAPO_GROUPS_PADDING(f); } out: @@ -669,26 +538,19 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net, nft_pipapo_for_each_field(f, i, m) { bool last = i == m->field_count - 1; - unsigned long *lt = f->lt; - int b, group; + int b; - /* For each 4-bit group: select lookup table bucket depending on + /* For each bit group: select lookup table bucket depending on * packet bytes value, then AND bucket value */ - for (group = 0; group < f->groups; group++) { - u8 v; - - if (group % 2) { - v = *data & 0x0f; - data++; - } else { - v = *data >> 4; - } - __bitmap_and(res_map, res_map, lt + v * f->bsize, - f->bsize * BITS_PER_LONG); + if (f->bb == 8) + pipapo_and_field_buckets_8bit(f, res_map, data); + else if (f->bb == 4) + pipapo_and_field_buckets_4bit(f, res_map, data); + else + BUG(); - lt += f->bsize * NFT_PIPAPO_BUCKETS; - } + data += f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f); /* Now populate the bitmap for the next field, unless this is * the last field, in which case return the matched 'ext' @@ -713,7 +575,7 @@ next_match: goto out; } - data += NFT_PIPAPO_GROUPS_PADDING(f->groups); + data += NFT_PIPAPO_GROUPS_PADDING(f); /* Swap bitmap indices: fill_map will be the initial bitmap for * the next field (i.e. the new res_map), and res_map is @@ -736,8 +598,8 @@ out: * @elem: nftables API element representation containing key data * @flags: Unused */ -void *nft_pipapo_get(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem, unsigned int flags) +static void *nft_pipapo_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) { return pipapo_get(net, set, (const u8 *)elem->key.val.data, nft_genmask_cur(net)); @@ -763,6 +625,10 @@ static int pipapo_resize(struct nft_pipapo_field *f, int old_rules, int rules) int group, bucket; new_bucket_size = DIV_ROUND_UP(rules, BITS_PER_LONG); +#ifdef NFT_PIPAPO_ALIGN + new_bucket_size = roundup(new_bucket_size, + NFT_PIPAPO_ALIGN / sizeof(*new_lt)); +#endif if (new_bucket_size == f->bsize) goto mt; @@ -772,15 +638,18 @@ static int pipapo_resize(struct nft_pipapo_field *f, int old_rules, int rules) else copy = new_bucket_size; - new_lt = kvzalloc(f->groups * NFT_PIPAPO_BUCKETS * new_bucket_size * - sizeof(*new_lt), GFP_KERNEL); + new_lt = kvzalloc(f->groups * NFT_PIPAPO_BUCKETS(f->bb) * + new_bucket_size * sizeof(*new_lt) + + NFT_PIPAPO_ALIGN_HEADROOM, + GFP_KERNEL); if (!new_lt) return -ENOMEM; - new_p = new_lt; - old_p = old_lt; + new_p = NFT_PIPAPO_LT_ALIGN(new_lt); + old_p = NFT_PIPAPO_LT_ALIGN(old_lt); + for (group = 0; group < f->groups; group++) { - for (bucket = 0; bucket < NFT_PIPAPO_BUCKETS; bucket++) { + for (bucket = 0; bucket < NFT_PIPAPO_BUCKETS(f->bb); bucket++) { memcpy(new_p, old_p, copy * sizeof(*new_p)); new_p += copy; old_p += copy; @@ -807,7 +676,7 @@ mt: if (new_lt) { f->bsize = new_bucket_size; - f->lt = new_lt; + NFT_PIPAPO_LT_ASSIGN(f, new_lt); kvfree(old_lt); } @@ -829,13 +698,196 @@ static void pipapo_bucket_set(struct nft_pipapo_field *f, int rule, int group, { unsigned long *pos; - pos = f->lt + f->bsize * NFT_PIPAPO_BUCKETS * group; + pos = NFT_PIPAPO_LT_ALIGN(f->lt); + pos += f->bsize * NFT_PIPAPO_BUCKETS(f->bb) * group; pos += f->bsize * v; __set_bit(rule, pos); } /** + * pipapo_lt_4b_to_8b() - Switch lookup table group width from 4 bits to 8 bits + * @old_groups: Number of current groups + * @bsize: Size of one bucket, in longs + * @old_lt: Pointer to the current lookup table + * @new_lt: Pointer to the new, pre-allocated lookup table + * + * Each bucket with index b in the new lookup table, belonging to group g, is + * filled with the bit intersection between: + * - bucket with index given by the upper 4 bits of b, from group g, and + * - bucket with index given by the lower 4 bits of b, from group g + 1 + * + * That is, given buckets from the new lookup table N(x, y) and the old lookup + * table O(x, y), with x bucket index, and y group index: + * + * N(b, g) := O(b / 16, g) & O(b % 16, g + 1) + * + * This ensures equivalence of the matching results on lookup. Two examples in + * pictures: + * + * bucket + * group 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 ... 254 255 + * 0 ^ + * 1 | ^ + * ... ( & ) | + * / \ | + * / \ .-( & )-. + * / bucket \ | | + * group 0 / 1 2 3 \ 4 5 6 7 8 9 10 11 12 13 |14 15 | + * 0 / \ | | + * 1 \ | | + * 2 | --' + * 3 '- + * ... + */ +static void pipapo_lt_4b_to_8b(int old_groups, int bsize, + unsigned long *old_lt, unsigned long *new_lt) +{ + int g, b, i; + + for (g = 0; g < old_groups / 2; g++) { + int src_g0 = g * 2, src_g1 = g * 2 + 1; + + for (b = 0; b < NFT_PIPAPO_BUCKETS(8); b++) { + int src_b0 = b / NFT_PIPAPO_BUCKETS(4); + int src_b1 = b % NFT_PIPAPO_BUCKETS(4); + int src_i0 = src_g0 * NFT_PIPAPO_BUCKETS(4) + src_b0; + int src_i1 = src_g1 * NFT_PIPAPO_BUCKETS(4) + src_b1; + + for (i = 0; i < bsize; i++) { + *new_lt = old_lt[src_i0 * bsize + i] & + old_lt[src_i1 * bsize + i]; + new_lt++; + } + } + } +} + +/** + * pipapo_lt_8b_to_4b() - Switch lookup table group width from 8 bits to 4 bits + * @old_groups: Number of current groups + * @bsize: Size of one bucket, in longs + * @old_lt: Pointer to the current lookup table + * @new_lt: Pointer to the new, pre-allocated lookup table + * + * Each bucket with index b in the new lookup table, belonging to group g, is + * filled with the bit union of: + * - all the buckets with index such that the upper four bits of the lower byte + * equal b, from group g, with g odd + * - all the buckets with index such that the lower four bits equal b, from + * group g, with g even + * + * That is, given buckets from the new lookup table N(x, y) and the old lookup + * table O(x, y), with x bucket index, and y group index: + * + * - with g odd: N(b, g) := U(O(x, g) for each x : x = (b & 0xf0) >> 4) + * - with g even: N(b, g) := U(O(x, g) for each x : x = b & 0x0f) + * + * where U() denotes the arbitrary union operation (binary OR of n terms). This + * ensures equivalence of the matching results on lookup. + */ +static void pipapo_lt_8b_to_4b(int old_groups, int bsize, + unsigned long *old_lt, unsigned long *new_lt) +{ + int g, b, bsrc, i; + + memset(new_lt, 0, old_groups * 2 * NFT_PIPAPO_BUCKETS(4) * bsize * + sizeof(unsigned long)); + + for (g = 0; g < old_groups * 2; g += 2) { + int src_g = g / 2; + + for (b = 0; b < NFT_PIPAPO_BUCKETS(4); b++) { + for (bsrc = NFT_PIPAPO_BUCKETS(8) * src_g; + bsrc < NFT_PIPAPO_BUCKETS(8) * (src_g + 1); + bsrc++) { + if (((bsrc & 0xf0) >> 4) != b) + continue; + + for (i = 0; i < bsize; i++) + new_lt[i] |= old_lt[bsrc * bsize + i]; + } + + new_lt += bsize; + } + + for (b = 0; b < NFT_PIPAPO_BUCKETS(4); b++) { + for (bsrc = NFT_PIPAPO_BUCKETS(8) * src_g; + bsrc < NFT_PIPAPO_BUCKETS(8) * (src_g + 1); + bsrc++) { + if ((bsrc & 0x0f) != b) + continue; + + for (i = 0; i < bsize; i++) + new_lt[i] |= old_lt[bsrc * bsize + i]; + } + + new_lt += bsize; + } + } +} + +/** + * pipapo_lt_bits_adjust() - Adjust group size for lookup table if needed + * @f: Field containing lookup table + */ +static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f) +{ + unsigned long *new_lt; + int groups, bb; + size_t lt_size; + + lt_size = f->groups * NFT_PIPAPO_BUCKETS(f->bb) * f->bsize * + sizeof(*f->lt); + + if (f->bb == NFT_PIPAPO_GROUP_BITS_SMALL_SET && + lt_size > NFT_PIPAPO_LT_SIZE_HIGH) { + groups = f->groups * 2; + bb = NFT_PIPAPO_GROUP_BITS_LARGE_SET; + + lt_size = groups * NFT_PIPAPO_BUCKETS(bb) * f->bsize * + sizeof(*f->lt); + } else if (f->bb == NFT_PIPAPO_GROUP_BITS_LARGE_SET && + lt_size < NFT_PIPAPO_LT_SIZE_LOW) { + groups = f->groups / 2; + bb = NFT_PIPAPO_GROUP_BITS_SMALL_SET; + + lt_size = groups * NFT_PIPAPO_BUCKETS(bb) * f->bsize * + sizeof(*f->lt); + + /* Don't increase group width if the resulting lookup table size + * would exceed the upper size threshold for a "small" set. + */ + if (lt_size > NFT_PIPAPO_LT_SIZE_HIGH) + return; + } else { + return; + } + + new_lt = kvzalloc(lt_size + NFT_PIPAPO_ALIGN_HEADROOM, GFP_KERNEL); + if (!new_lt) + return; + + NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4; + if (f->bb == 4 && bb == 8) { + pipapo_lt_4b_to_8b(f->groups, f->bsize, + NFT_PIPAPO_LT_ALIGN(f->lt), + NFT_PIPAPO_LT_ALIGN(new_lt)); + } else if (f->bb == 8 && bb == 4) { + pipapo_lt_8b_to_4b(f->groups, f->bsize, + NFT_PIPAPO_LT_ALIGN(f->lt), + NFT_PIPAPO_LT_ALIGN(new_lt)); + } else { + BUG(); + } + + f->groups = groups; + f->bb = bb; + kvfree(f->lt); + NFT_PIPAPO_LT_ASSIGN(f, new_lt); +} + +/** * pipapo_insert() - Insert new rule in field given input key and mask length * @f: Field containing lookup table * @k: Input key for classification, without nftables padding @@ -849,7 +901,7 @@ static void pipapo_bucket_set(struct nft_pipapo_field *f, int rule, int group, static int pipapo_insert(struct nft_pipapo_field *f, const uint8_t *k, int mask_bits) { - int rule = f->rules++, group, ret; + int rule = f->rules++, group, ret, bit_offset = 0; ret = pipapo_resize(f, f->rules - 1, f->rules); if (ret) @@ -859,28 +911,33 @@ static int pipapo_insert(struct nft_pipapo_field *f, const uint8_t *k, int i, v; u8 mask; - if (group % 2) - v = k[group / 2] & 0x0f; - else - v = k[group / 2] >> 4; + v = k[group / (BITS_PER_BYTE / f->bb)]; + v &= GENMASK(BITS_PER_BYTE - bit_offset - 1, 0); + v >>= (BITS_PER_BYTE - bit_offset) - f->bb; + + bit_offset += f->bb; + bit_offset %= BITS_PER_BYTE; - if (mask_bits >= (group + 1) * 4) { + if (mask_bits >= (group + 1) * f->bb) { /* Not masked */ pipapo_bucket_set(f, rule, group, v); - } else if (mask_bits <= group * 4) { + } else if (mask_bits <= group * f->bb) { /* Completely masked */ - for (i = 0; i < NFT_PIPAPO_BUCKETS; i++) + for (i = 0; i < NFT_PIPAPO_BUCKETS(f->bb); i++) pipapo_bucket_set(f, rule, group, i); } else { /* The mask limit falls on this group */ - mask = 0x0f >> (mask_bits - group * 4); - for (i = 0; i < NFT_PIPAPO_BUCKETS; i++) { + mask = GENMASK(f->bb - 1, 0); + mask >>= mask_bits - group * f->bb; + for (i = 0; i < NFT_PIPAPO_BUCKETS(f->bb); i++) { if ((i & ~mask) == (v & ~mask)) pipapo_bucket_set(f, rule, group, i); } } } + pipapo_lt_bits_adjust(f); + return 1; } @@ -1053,8 +1110,12 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, for_each_possible_cpu(i) { unsigned long *scratch; +#ifdef NFT_PIPAPO_ALIGN + unsigned long *scratch_aligned; +#endif - scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2, + scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2 + + NFT_PIPAPO_ALIGN_HEADROOM, GFP_KERNEL, cpu_to_node(i)); if (!scratch) { /* On failure, there's no need to undo previous @@ -1070,6 +1131,11 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, kfree(*per_cpu_ptr(clone->scratch, i)); *per_cpu_ptr(clone->scratch, i) = scratch; + +#ifdef NFT_PIPAPO_ALIGN + scratch_aligned = NFT_PIPAPO_LT_ALIGN(scratch); + *per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned; +#endif } return 0; @@ -1123,11 +1189,11 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, return -ENOSPC; if (memcmp(start_p, end_p, - f->groups / NFT_PIPAPO_GROUPS_PER_BYTE) > 0) + f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f)) > 0) return -EINVAL; - start_p += NFT_PIPAPO_GROUPS_PADDED_SIZE(f->groups); - end_p += NFT_PIPAPO_GROUPS_PADDED_SIZE(f->groups); + start_p += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); + end_p += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); } /* Insert */ @@ -1141,22 +1207,19 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, rulemap[i].to = f->rules; ret = memcmp(start, end, - f->groups / NFT_PIPAPO_GROUPS_PER_BYTE); - if (!ret) { - ret = pipapo_insert(f, start, - f->groups * NFT_PIPAPO_GROUP_BITS); - } else { - ret = pipapo_expand(f, start, end, - f->groups * NFT_PIPAPO_GROUP_BITS); - } + f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f)); + if (!ret) + ret = pipapo_insert(f, start, f->groups * f->bb); + else + ret = pipapo_expand(f, start, end, f->groups * f->bb); if (f->bsize > bsize_max) bsize_max = f->bsize; rulemap[i].n = ret; - start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f->groups); - end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f->groups); + start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); + end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); } if (!*this_cpu_ptr(m->scratch) || bsize_max > m->bsize_max) { @@ -1200,23 +1263,35 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) if (!new->scratch) goto out_scratch; +#ifdef NFT_PIPAPO_ALIGN + new->scratch_aligned = alloc_percpu(*new->scratch_aligned); + if (!new->scratch_aligned) + goto out_scratch; +#endif + rcu_head_init(&new->rcu); src = old->f; dst = new->f; for (i = 0; i < old->field_count; i++) { + unsigned long *new_lt; + memcpy(dst, src, offsetof(struct nft_pipapo_field, lt)); - dst->lt = kvzalloc(src->groups * NFT_PIPAPO_BUCKETS * - src->bsize * sizeof(*dst->lt), - GFP_KERNEL); - if (!dst->lt) + new_lt = kvzalloc(src->groups * NFT_PIPAPO_BUCKETS(src->bb) * + src->bsize * sizeof(*dst->lt) + + NFT_PIPAPO_ALIGN_HEADROOM, + GFP_KERNEL); + if (!new_lt) goto out_lt; - memcpy(dst->lt, src->lt, + NFT_PIPAPO_LT_ASSIGN(dst, new_lt); + + memcpy(NFT_PIPAPO_LT_ALIGN(new_lt), + NFT_PIPAPO_LT_ALIGN(src->lt), src->bsize * sizeof(*dst->lt) * - src->groups * NFT_PIPAPO_BUCKETS); + src->groups * NFT_PIPAPO_BUCKETS(src->bb)); dst->mt = kvmalloc(src->rules * sizeof(*src->mt), GFP_KERNEL); if (!dst->mt) @@ -1237,8 +1312,11 @@ out_lt: kvfree(dst->lt); dst--; } - free_percpu(new->scratch); +#ifdef NFT_PIPAPO_ALIGN + free_percpu(new->scratch_aligned); +#endif out_scratch: + free_percpu(new->scratch); kfree(new); return ERR_PTR(-ENOMEM); @@ -1394,9 +1472,10 @@ static void pipapo_drop(struct nft_pipapo_match *m, unsigned long *pos; int b; - pos = f->lt + g * NFT_PIPAPO_BUCKETS * f->bsize; + pos = NFT_PIPAPO_LT_ALIGN(f->lt) + g * + NFT_PIPAPO_BUCKETS(f->bb) * f->bsize; - for (b = 0; b < NFT_PIPAPO_BUCKETS; b++) { + for (b = 0; b < NFT_PIPAPO_BUCKETS(f->bb); b++) { bitmap_cut(pos, pos, rulemap[i].to, rulemap[i].n, f->bsize * BITS_PER_LONG); @@ -1414,6 +1493,8 @@ static void pipapo_drop(struct nft_pipapo_match *m, ; } f->rules -= rulemap[i].n; + + pipapo_lt_bits_adjust(f); } } @@ -1498,6 +1579,9 @@ static void pipapo_reclaim_match(struct rcu_head *rcu) for_each_possible_cpu(i) kfree(*per_cpu_ptr(m->scratch, i)); +#ifdef NFT_PIPAPO_ALIGN + free_percpu(m->scratch_aligned); +#endif free_percpu(m->scratch); pipapo_free_fields(m); @@ -1690,30 +1774,33 @@ static bool nft_pipapo_flush(const struct net *net, const struct nft_set *set, static int pipapo_get_boundaries(struct nft_pipapo_field *f, int first_rule, int rule_count, u8 *left, u8 *right) { + int g, mask_len = 0, bit_offset = 0; u8 *l = left, *r = right; - int g, mask_len = 0; for (g = 0; g < f->groups; g++) { int b, x0, x1; x0 = -1; x1 = -1; - for (b = 0; b < NFT_PIPAPO_BUCKETS; b++) { + for (b = 0; b < NFT_PIPAPO_BUCKETS(f->bb); b++) { unsigned long *pos; - pos = f->lt + (g * NFT_PIPAPO_BUCKETS + b) * f->bsize; + pos = NFT_PIPAPO_LT_ALIGN(f->lt) + + (g * NFT_PIPAPO_BUCKETS(f->bb) + b) * f->bsize; if (test_bit(first_rule, pos) && x0 == -1) x0 = b; if (test_bit(first_rule + rule_count - 1, pos)) x1 = b; } - if (g % 2) { - *(l++) |= x0 & 0x0f; - *(r++) |= x1 & 0x0f; - } else { - *l |= x0 << 4; - *r |= x1 << 4; + *l |= x0 << (BITS_PER_BYTE - f->bb - bit_offset); + *r |= x1 << (BITS_PER_BYTE - f->bb - bit_offset); + + bit_offset += f->bb; + if (bit_offset >= BITS_PER_BYTE) { + bit_offset %= BITS_PER_BYTE; + l++; + r++; } if (x1 - x0 == 0) @@ -1748,8 +1835,9 @@ static bool pipapo_match_field(struct nft_pipapo_field *f, pipapo_get_boundaries(f, first_rule, rule_count, left, right); - return !memcmp(start, left, f->groups / NFT_PIPAPO_GROUPS_PER_BYTE) && - !memcmp(end, right, f->groups / NFT_PIPAPO_GROUPS_PER_BYTE); + return !memcmp(start, left, + f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f)) && + !memcmp(end, right, f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f)); } /** @@ -1801,8 +1889,8 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, rules_fx = f->mt[start].n; start = f->mt[start].to; - match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f->groups); - match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f->groups); + match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); + match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); } if (i == m->field_count) { @@ -1885,56 +1973,24 @@ static u64 nft_pipapo_privsize(const struct nlattr * const nla[], } /** - * nft_pipapo_estimate() - Estimate set size, space and lookup complexity - * @desc: Set description, element count and field description used here + * nft_pipapo_estimate() - Set size, space and lookup complexity + * @desc: Set description, element count and field description used * @features: Flags: NFT_SET_INTERVAL needs to be there * @est: Storage for estimation data * - * The size for this set type can vary dramatically, as it depends on the number - * of rules (composing netmasks) the entries expand to. We compute the worst - * case here. - * - * In general, for a non-ranged entry or a single composing netmask, we need - * one bit in each of the sixteen NFT_PIPAPO_BUCKETS, for each 4-bit group (that - * is, each input bit needs four bits of matching data), plus a bucket in the - * mapping table for each field. - * - * Return: true only for compatible range concatenations + * Return: true if set description is compatible, false otherwise */ static bool nft_pipapo_estimate(const struct nft_set_desc *desc, u32 features, struct nft_set_estimate *est) { - unsigned long entry_size; - int i; - - if (!(features & NFT_SET_INTERVAL) || desc->field_count <= 1) + if (!(features & NFT_SET_INTERVAL) || + desc->field_count < NFT_PIPAPO_MIN_FIELDS) return false; - for (i = 0, entry_size = 0; i < desc->field_count; i++) { - unsigned long rules; - - if (desc->field_len[i] > NFT_PIPAPO_MAX_BYTES) - return false; - - /* Worst-case ranges for each concatenated field: each n-bit - * field can expand to up to n * 2 rules in each bucket, and - * each rule also needs a mapping bucket. - */ - rules = ilog2(desc->field_len[i] * BITS_PER_BYTE) * 2; - entry_size += rules * NFT_PIPAPO_BUCKETS / BITS_PER_BYTE; - entry_size += rules * sizeof(union nft_pipapo_map_bucket); - } - - /* Rules in lookup and mapping tables are needed for each entry */ - est->size = desc->size * entry_size; - if (est->size && div_u64(est->size, desc->size) != entry_size) + est->size = pipapo_estimate_size(desc); + if (!est->size) return false; - est->size += sizeof(struct nft_pipapo) + - sizeof(struct nft_pipapo_match) * 2; - - est->size += sizeof(struct nft_pipapo_field) * desc->field_count; - est->lookup = NFT_SET_CLASS_O_LOG_N; est->space = NFT_SET_CLASS_O_N; @@ -1961,38 +2017,52 @@ static int nft_pipapo_init(const struct nft_set *set, struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m; struct nft_pipapo_field *f; - int err, i; + int err, i, field_count; + + field_count = desc->field_count ? : 1; - if (desc->field_count > NFT_PIPAPO_MAX_FIELDS) + if (field_count > NFT_PIPAPO_MAX_FIELDS) return -EINVAL; - m = kmalloc(sizeof(*priv->match) + sizeof(*f) * desc->field_count, + m = kmalloc(sizeof(*priv->match) + sizeof(*f) * field_count, GFP_KERNEL); if (!m) return -ENOMEM; - m->field_count = desc->field_count; + m->field_count = field_count; m->bsize_max = 0; m->scratch = alloc_percpu(unsigned long *); if (!m->scratch) { err = -ENOMEM; - goto out_free; + goto out_scratch; } for_each_possible_cpu(i) *per_cpu_ptr(m->scratch, i) = NULL; +#ifdef NFT_PIPAPO_ALIGN + m->scratch_aligned = alloc_percpu(unsigned long *); + if (!m->scratch_aligned) { + err = -ENOMEM; + goto out_free; + } + for_each_possible_cpu(i) + *per_cpu_ptr(m->scratch_aligned, i) = NULL; +#endif + rcu_head_init(&m->rcu); nft_pipapo_for_each_field(f, i, m) { - f->groups = desc->field_len[i] * NFT_PIPAPO_GROUPS_PER_BYTE; - priv->groups += f->groups; + int len = desc->field_len[i] ? : set->klen; - priv->width += round_up(desc->field_len[i], sizeof(u32)); + f->bb = NFT_PIPAPO_GROUP_BITS_INIT; + f->groups = len * NFT_PIPAPO_GROUPS_PER_BYTE(f); + + priv->width += round_up(len, sizeof(u32)); f->bsize = 0; f->rules = 0; - f->lt = NULL; + NFT_PIPAPO_LT_ASSIGN(f, NULL); f->mt = NULL; } @@ -2010,7 +2080,11 @@ static int nft_pipapo_init(const struct nft_set *set, return 0; out_free: +#ifdef NFT_PIPAPO_ALIGN + free_percpu(m->scratch_aligned); +#endif free_percpu(m->scratch); +out_scratch: kfree(m); return err; @@ -2045,16 +2119,21 @@ static void nft_pipapo_destroy(const struct nft_set *set) nft_set_elem_destroy(set, e, true); } +#ifdef NFT_PIPAPO_ALIGN + free_percpu(m->scratch_aligned); +#endif for_each_possible_cpu(cpu) kfree(*per_cpu_ptr(m->scratch, cpu)); free_percpu(m->scratch); - pipapo_free_fields(m); kfree(m); priv->match = NULL; } if (priv->clone) { +#ifdef NFT_PIPAPO_ALIGN + free_percpu(priv->clone->scratch_aligned); +#endif for_each_possible_cpu(cpu) kfree(*per_cpu_ptr(priv->clone->scratch, cpu)); free_percpu(priv->clone->scratch); @@ -2081,8 +2160,7 @@ static void nft_pipapo_gc_init(const struct nft_set *set) priv->last_gc = jiffies; } -struct nft_set_type nft_set_pipapo_type __read_mostly = { - .owner = THIS_MODULE, +const struct nft_set_type nft_set_pipapo_type = { .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, .ops = { @@ -2102,3 +2180,26 @@ struct nft_set_type nft_set_pipapo_type __read_mostly = { .elemsize = offsetof(struct nft_pipapo_elem, ext), }, }; + +#if defined(CONFIG_X86_64) && defined(CONFIG_AS_AVX2) +const struct nft_set_type nft_set_pipapo_avx2_type = { + .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | + NFT_SET_TIMEOUT, + .ops = { + .lookup = nft_pipapo_avx2_lookup, + .insert = nft_pipapo_insert, + .activate = nft_pipapo_activate, + .deactivate = nft_pipapo_deactivate, + .flush = nft_pipapo_flush, + .remove = nft_pipapo_remove, + .walk = nft_pipapo_walk, + .get = nft_pipapo_get, + .privsize = nft_pipapo_privsize, + .estimate = nft_pipapo_avx2_estimate, + .init = nft_pipapo_init, + .destroy = nft_pipapo_destroy, + .gc_init = nft_pipapo_gc_init, + .elemsize = offsetof(struct nft_pipapo_elem, ext), + }, +}; +#endif diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h new file mode 100644 index 000000000000..25a75591583e --- /dev/null +++ b/net/netfilter/nft_set_pipapo.h @@ -0,0 +1,280 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#ifndef _NFT_SET_PIPAPO_H + +#include <linux/log2.h> +#include <net/ipv6.h> /* For the maximum length of a field */ + +/* Count of concatenated fields depends on count of 32-bit nftables registers */ +#define NFT_PIPAPO_MAX_FIELDS NFT_REG32_COUNT + +/* Restrict usage to multiple fields, make sure rbtree is used otherwise */ +#define NFT_PIPAPO_MIN_FIELDS 2 + +/* Largest supported field size */ +#define NFT_PIPAPO_MAX_BYTES (sizeof(struct in6_addr)) +#define NFT_PIPAPO_MAX_BITS (NFT_PIPAPO_MAX_BYTES * BITS_PER_BYTE) + +/* Bits to be grouped together in table buckets depending on set size */ +#define NFT_PIPAPO_GROUP_BITS_INIT NFT_PIPAPO_GROUP_BITS_SMALL_SET +#define NFT_PIPAPO_GROUP_BITS_SMALL_SET 8 +#define NFT_PIPAPO_GROUP_BITS_LARGE_SET 4 +#define NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4 \ + BUILD_BUG_ON((NFT_PIPAPO_GROUP_BITS_SMALL_SET != 8) || \ + (NFT_PIPAPO_GROUP_BITS_LARGE_SET != 4)) +#define NFT_PIPAPO_GROUPS_PER_BYTE(f) (BITS_PER_BYTE / (f)->bb) + +/* If a lookup table gets bigger than NFT_PIPAPO_LT_SIZE_HIGH, switch to the + * small group width, and switch to the big group width if the table gets + * smaller than NFT_PIPAPO_LT_SIZE_LOW. + * + * Picking 2MiB as threshold (for a single table) avoids as much as possible + * crossing page boundaries on most architectures (x86-64 and MIPS huge pages, + * ARMv7 supersections, POWER "large" pages, SPARC Level 1 regions, etc.), which + * keeps performance nice in case kvmalloc() gives us non-contiguous areas. + */ +#define NFT_PIPAPO_LT_SIZE_THRESHOLD (1 << 21) +#define NFT_PIPAPO_LT_SIZE_HYSTERESIS (1 << 16) +#define NFT_PIPAPO_LT_SIZE_HIGH NFT_PIPAPO_LT_SIZE_THRESHOLD +#define NFT_PIPAPO_LT_SIZE_LOW NFT_PIPAPO_LT_SIZE_THRESHOLD - \ + NFT_PIPAPO_LT_SIZE_HYSTERESIS + +/* Fields are padded to 32 bits in input registers */ +#define NFT_PIPAPO_GROUPS_PADDED_SIZE(f) \ + (round_up((f)->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f), sizeof(u32))) +#define NFT_PIPAPO_GROUPS_PADDING(f) \ + (NFT_PIPAPO_GROUPS_PADDED_SIZE(f) - (f)->groups / \ + NFT_PIPAPO_GROUPS_PER_BYTE(f)) + +/* Number of buckets given by 2 ^ n, with n bucket bits */ +#define NFT_PIPAPO_BUCKETS(bb) (1 << (bb)) + +/* Each n-bit range maps to up to n * 2 rules */ +#define NFT_PIPAPO_MAP_NBITS (const_ilog2(NFT_PIPAPO_MAX_BITS * 2)) + +/* Use the rest of mapping table buckets for rule indices, but it makes no sense + * to exceed 32 bits + */ +#if BITS_PER_LONG == 64 +#define NFT_PIPAPO_MAP_TOBITS 32 +#else +#define NFT_PIPAPO_MAP_TOBITS (BITS_PER_LONG - NFT_PIPAPO_MAP_NBITS) +#endif + +/* ...which gives us the highest allowed index for a rule */ +#define NFT_PIPAPO_RULE0_MAX ((1UL << (NFT_PIPAPO_MAP_TOBITS - 1)) \ + - (1UL << NFT_PIPAPO_MAP_NBITS)) + +/* Definitions for vectorised implementations */ +#ifdef NFT_PIPAPO_ALIGN +#define NFT_PIPAPO_ALIGN_HEADROOM \ + (NFT_PIPAPO_ALIGN - ARCH_KMALLOC_MINALIGN) +#define NFT_PIPAPO_LT_ALIGN(lt) (PTR_ALIGN((lt), NFT_PIPAPO_ALIGN)) +#define NFT_PIPAPO_LT_ASSIGN(field, x) \ + do { \ + (field)->lt_aligned = NFT_PIPAPO_LT_ALIGN(x); \ + (field)->lt = (x); \ + } while (0) +#else +#define NFT_PIPAPO_ALIGN_HEADROOM 0 +#define NFT_PIPAPO_LT_ALIGN(lt) (lt) +#define NFT_PIPAPO_LT_ASSIGN(field, x) ((field)->lt = (x)) +#endif /* NFT_PIPAPO_ALIGN */ + +#define nft_pipapo_for_each_field(field, index, match) \ + for ((field) = (match)->f, (index) = 0; \ + (index) < (match)->field_count; \ + (index)++, (field)++) + +/** + * union nft_pipapo_map_bucket - Bucket of mapping table + * @to: First rule number (in next field) this rule maps to + * @n: Number of rules (in next field) this rule maps to + * @e: If there's no next field, pointer to element this rule maps to + */ +union nft_pipapo_map_bucket { + struct { +#if BITS_PER_LONG == 64 + static_assert(NFT_PIPAPO_MAP_TOBITS <= 32); + u32 to; + + static_assert(NFT_PIPAPO_MAP_NBITS <= 32); + u32 n; +#else + unsigned long to:NFT_PIPAPO_MAP_TOBITS; + unsigned long n:NFT_PIPAPO_MAP_NBITS; +#endif + }; + struct nft_pipapo_elem *e; +}; + +/** + * struct nft_pipapo_field - Lookup, mapping tables and related data for a field + * @groups: Amount of bit groups + * @rules: Number of inserted rules + * @bsize: Size of each bucket in lookup table, in longs + * @bb: Number of bits grouped together in lookup table buckets + * @lt: Lookup table: 'groups' rows of buckets + * @lt_aligned: Version of @lt aligned to NFT_PIPAPO_ALIGN bytes + * @mt: Mapping table: one bucket per rule + */ +struct nft_pipapo_field { + int groups; + unsigned long rules; + size_t bsize; + int bb; +#ifdef NFT_PIPAPO_ALIGN + unsigned long *lt_aligned; +#endif + unsigned long *lt; + union nft_pipapo_map_bucket *mt; +}; + +/** + * struct nft_pipapo_match - Data used for lookup and matching + * @field_count Amount of fields in set + * @scratch: Preallocated per-CPU maps for partial matching results + * @scratch_aligned: Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes + * @bsize_max: Maximum lookup table bucket size of all fields, in longs + * @rcu Matching data is swapped on commits + * @f: Fields, with lookup and mapping tables + */ +struct nft_pipapo_match { + int field_count; +#ifdef NFT_PIPAPO_ALIGN + unsigned long * __percpu *scratch_aligned; +#endif + unsigned long * __percpu *scratch; + size_t bsize_max; + struct rcu_head rcu; + struct nft_pipapo_field f[]; +}; + +/** + * struct nft_pipapo - Representation of a set + * @match: Currently in-use matching data + * @clone: Copy where pending insertions and deletions are kept + * @width: Total bytes to be matched for one packet, including padding + * @dirty: Working copy has pending insertions or deletions + * @last_gc: Timestamp of last garbage collection run, jiffies + */ +struct nft_pipapo { + struct nft_pipapo_match __rcu *match; + struct nft_pipapo_match *clone; + int width; + bool dirty; + unsigned long last_gc; +}; + +struct nft_pipapo_elem; + +/** + * struct nft_pipapo_elem - API-facing representation of single set element + * @ext: nftables API extensions + */ +struct nft_pipapo_elem { + struct nft_set_ext ext; +}; + +int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, + union nft_pipapo_map_bucket *mt, bool match_only); + +/** + * pipapo_and_field_buckets_4bit() - Intersect 4-bit buckets + * @f: Field including lookup table + * @dst: Area to store result + * @data: Input data selecting table buckets + */ +static inline void pipapo_and_field_buckets_4bit(struct nft_pipapo_field *f, + unsigned long *dst, + const u8 *data) +{ + unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt); + int group; + + for (group = 0; group < f->groups; group += BITS_PER_BYTE / 4, data++) { + u8 v; + + v = *data >> 4; + __bitmap_and(dst, dst, lt + v * f->bsize, + f->bsize * BITS_PER_LONG); + lt += f->bsize * NFT_PIPAPO_BUCKETS(4); + + v = *data & 0x0f; + __bitmap_and(dst, dst, lt + v * f->bsize, + f->bsize * BITS_PER_LONG); + lt += f->bsize * NFT_PIPAPO_BUCKETS(4); + } +} + +/** + * pipapo_and_field_buckets_8bit() - Intersect 8-bit buckets + * @f: Field including lookup table + * @dst: Area to store result + * @data: Input data selecting table buckets + */ +static inline void pipapo_and_field_buckets_8bit(struct nft_pipapo_field *f, + unsigned long *dst, + const u8 *data) +{ + unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt); + int group; + + for (group = 0; group < f->groups; group++, data++) { + __bitmap_and(dst, dst, lt + *data * f->bsize, + f->bsize * BITS_PER_LONG); + lt += f->bsize * NFT_PIPAPO_BUCKETS(8); + } +} + +/** + * pipapo_estimate_size() - Estimate worst-case for set size + * @desc: Set description, element count and field description used here + * + * The size for this set type can vary dramatically, as it depends on the number + * of rules (composing netmasks) the entries expand to. We compute the worst + * case here. + * + * In general, for a non-ranged entry or a single composing netmask, we need + * one bit in each of the sixteen NFT_PIPAPO_BUCKETS, for each 4-bit group (that + * is, each input bit needs four bits of matching data), plus a bucket in the + * mapping table for each field. + * + * Return: worst-case set size in bytes, 0 on any overflow + */ +static u64 pipapo_estimate_size(const struct nft_set_desc *desc) +{ + unsigned long entry_size; + u64 size; + int i; + + for (i = 0, entry_size = 0; i < desc->field_count; i++) { + unsigned long rules; + + if (desc->field_len[i] > NFT_PIPAPO_MAX_BYTES) + return 0; + + /* Worst-case ranges for each concatenated field: each n-bit + * field can expand to up to n * 2 rules in each bucket, and + * each rule also needs a mapping bucket. + */ + rules = ilog2(desc->field_len[i] * BITS_PER_BYTE) * 2; + entry_size += rules * + NFT_PIPAPO_BUCKETS(NFT_PIPAPO_GROUP_BITS_INIT) / + BITS_PER_BYTE; + entry_size += rules * sizeof(union nft_pipapo_map_bucket); + } + + /* Rules in lookup and mapping tables are needed for each entry */ + size = desc->size * entry_size; + if (size && div_u64(size, desc->size) != entry_size) + return 0; + + size += sizeof(struct nft_pipapo) + sizeof(struct nft_pipapo_match) * 2; + + size += sizeof(struct nft_pipapo_field) * desc->field_count; + + return size; +} + +#endif /* _NFT_SET_PIPAPO_H */ diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c new file mode 100644 index 000000000000..d65ae0e23028 --- /dev/null +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -0,0 +1,1223 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* PIPAPO: PIle PAcket POlicies: AVX2 packet lookup routines + * + * Copyright (c) 2019-2020 Red Hat GmbH + * + * Author: Stefano Brivio <sbrivio@redhat.com> + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> +#include <uapi/linux/netfilter/nf_tables.h> +#include <linux/bitmap.h> +#include <linux/bitops.h> + +#include <linux/compiler.h> +#include <asm/fpu/api.h> + +#include "nft_set_pipapo_avx2.h" +#include "nft_set_pipapo.h" + +#define NFT_PIPAPO_LONGS_PER_M256 (XSAVE_YMM_SIZE / BITS_PER_LONG) + +/* Load from memory into YMM register with non-temporal hint ("stream load"), + * that is, don't fetch lines from memory into the cache. This avoids pushing + * precious packet data out of the cache hierarchy, and is appropriate when: + * + * - loading buckets from lookup tables, as they are not going to be used + * again before packets are entirely classified + * + * - loading the result bitmap from the previous field, as it's never used + * again + */ +#define NFT_PIPAPO_AVX2_LOAD(reg, loc) \ + asm volatile("vmovntdqa %0, %%ymm" #reg : : "m" (loc)) + +/* Stream a single lookup table bucket into YMM register given lookup table, + * group index, value of packet bits, bucket size. + */ +#define NFT_PIPAPO_AVX2_BUCKET_LOAD4(reg, lt, group, v, bsize) \ + NFT_PIPAPO_AVX2_LOAD(reg, \ + lt[((group) * NFT_PIPAPO_BUCKETS(4) + \ + (v)) * (bsize)]) +#define NFT_PIPAPO_AVX2_BUCKET_LOAD8(reg, lt, group, v, bsize) \ + NFT_PIPAPO_AVX2_LOAD(reg, \ + lt[((group) * NFT_PIPAPO_BUCKETS(8) + \ + (v)) * (bsize)]) + +/* Bitwise AND: the staple operation of this algorithm */ +#define NFT_PIPAPO_AVX2_AND(dst, a, b) \ + asm volatile("vpand %ymm" #a ", %ymm" #b ", %ymm" #dst) + +/* Jump to label if @reg is zero */ +#define NFT_PIPAPO_AVX2_NOMATCH_GOTO(reg, label) \ + asm_volatile_goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \ + "je %l[" #label "]" : : : : label) + +/* Store 256 bits from YMM register into memory. Contrary to bucket load + * operation, we don't bypass the cache here, as stored matching results + * are always used shortly after. + */ +#define NFT_PIPAPO_AVX2_STORE(loc, reg) \ + asm volatile("vmovdqa %%ymm" #reg ", %0" : "=m" (loc)) + +/* Zero out a complete YMM register, @reg */ +#define NFT_PIPAPO_AVX2_ZERO(reg) \ + asm volatile("vpxor %ymm" #reg ", %ymm" #reg ", %ymm" #reg) + +/* Current working bitmap index, toggled between field matches */ +static DEFINE_PER_CPU(bool, nft_pipapo_avx2_scratch_index); + +/** + * nft_pipapo_avx2_prepare() - Prepare before main algorithm body + * + * This zeroes out ymm15, which is later used whenever we need to clear a + * memory location, by storing its content into memory. + */ +static void nft_pipapo_avx2_prepare(void) +{ + NFT_PIPAPO_AVX2_ZERO(15); +} + +/** + * nft_pipapo_avx2_fill() - Fill a bitmap region with ones + * @data: Base memory area + * @start: First bit to set + * @len: Count of bits to fill + * + * This is nothing else than a version of bitmap_set(), as used e.g. by + * pipapo_refill(), tailored for the microarchitectures using it and better + * suited for the specific usage: it's very likely that we'll set a small number + * of bits, not crossing a word boundary, and correct branch prediction is + * critical here. + * + * This function doesn't actually use any AVX2 instruction. + */ +static void nft_pipapo_avx2_fill(unsigned long *data, int start, int len) +{ + int offset = start % BITS_PER_LONG; + unsigned long mask; + + data += start / BITS_PER_LONG; + + if (likely(len == 1)) { + *data |= BIT(offset); + return; + } + + if (likely(len < BITS_PER_LONG || offset)) { + if (likely(len + offset <= BITS_PER_LONG)) { + *data |= GENMASK(len - 1 + offset, offset); + return; + } + + *data |= ~0UL << offset; + len -= BITS_PER_LONG - offset; + data++; + + if (len <= BITS_PER_LONG) { + mask = ~0UL >> (BITS_PER_LONG - len); + *data |= mask; + return; + } + } + + memset(data, 0xff, len / BITS_PER_BYTE); + data += len / BITS_PER_LONG; + + len %= BITS_PER_LONG; + if (len) + *data |= ~0UL >> (BITS_PER_LONG - len); +} + +/** + * nft_pipapo_avx2_refill() - Scan bitmap, select mapping table item, set bits + * @offset: Start from given bitmap (equivalent to bucket) offset, in longs + * @map: Bitmap to be scanned for set bits + * @dst: Destination bitmap + * @mt: Mapping table containing bit set specifiers + * @len: Length of bitmap in longs + * @last: Return index of first set bit, if this is the last field + * + * This is an alternative implementation of pipapo_refill() suitable for usage + * with AVX2 lookup routines: we know there are four words to be scanned, at + * a given offset inside the map, for each matching iteration. + * + * This function doesn't actually use any AVX2 instruction. + * + * Return: first set bit index if @last, index of first filled word otherwise. + */ +static int nft_pipapo_avx2_refill(int offset, unsigned long *map, + unsigned long *dst, + union nft_pipapo_map_bucket *mt, bool last) +{ + int ret = -1; + +#define NFT_PIPAPO_AVX2_REFILL_ONE_WORD(x) \ + do { \ + while (map[(x)]) { \ + int r = __builtin_ctzl(map[(x)]); \ + int i = (offset + (x)) * BITS_PER_LONG + r; \ + \ + if (last) \ + return i; \ + \ + nft_pipapo_avx2_fill(dst, mt[i].to, mt[i].n); \ + \ + if (ret == -1) \ + ret = mt[i].to; \ + \ + map[(x)] &= ~(1UL << r); \ + } \ + } while (0) + + NFT_PIPAPO_AVX2_REFILL_ONE_WORD(0); + NFT_PIPAPO_AVX2_REFILL_ONE_WORD(1); + NFT_PIPAPO_AVX2_REFILL_ONE_WORD(2); + NFT_PIPAPO_AVX2_REFILL_ONE_WORD(3); +#undef NFT_PIPAPO_AVX2_REFILL_ONE_WORD + + return ret; +} + +/** + * nft_pipapo_avx2_lookup_4b_2() - AVX2-based lookup for 2 four-bit groups + * @map: Previous match result, used as initial bitmap + * @fill: Destination bitmap to be filled with current match result + * @f: Field, containing lookup and mapping tables + * @offset: Ignore buckets before the given index, no bits are filled there + * @pkt: Packet data, pointer to input nftables register + * @first: If this is the first field, don't source previous result + * @last: Last field: stop at the first match and return bit index + * + * Load buckets from lookup table corresponding to the values of each 4-bit + * group of packet bytes, and perform a bitwise intersection between them. If + * this is the first field in the set, simply AND the buckets together + * (equivalent to using an all-ones starting bitmap), use the provided starting + * bitmap otherwise. Then call nft_pipapo_avx2_refill() to generate the next + * working bitmap, @fill. + * + * This is used for 8-bit fields (i.e. protocol numbers). + * + * Out-of-order (and superscalar) execution is vital here, so it's critical to + * avoid false data dependencies. CPU and compiler could (mostly) take care of + * this on their own, but the operation ordering is explicitly given here with + * a likely execution order in mind, to highlight possible stalls. That's why + * a number of logically distinct operations (i.e. loading buckets, intersecting + * buckets) are interleaved. + * + * Return: -1 on no match, rule index of match if @last, otherwise first long + * word index to be checked next (i.e. first filled word). + */ +static int nft_pipapo_avx2_lookup_4b_2(unsigned long *map, unsigned long *fill, + struct nft_pipapo_field *f, int offset, + const u8 *pkt, bool first, bool last) +{ + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + u8 pg[2] = { pkt[0] >> 4, pkt[0] & 0xf }; + unsigned long *lt = f->lt, bsize = f->bsize; + + lt += offset * NFT_PIPAPO_LONGS_PER_M256; + for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) { + int i_ul = i * NFT_PIPAPO_LONGS_PER_M256; + + if (first) { + NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 1, pg[1], bsize); + NFT_PIPAPO_AVX2_AND(4, 0, 1); + } else { + NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize); + NFT_PIPAPO_AVX2_LOAD(2, map[i_ul]); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 1, pg[1], bsize); + NFT_PIPAPO_AVX2_NOMATCH_GOTO(2, nothing); + NFT_PIPAPO_AVX2_AND(3, 0, 1); + NFT_PIPAPO_AVX2_AND(4, 2, 3); + } + + NFT_PIPAPO_AVX2_NOMATCH_GOTO(4, nomatch); + NFT_PIPAPO_AVX2_STORE(map[i_ul], 4); + + b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); + if (last) + return b; + + if (unlikely(ret == -1)) + ret = b / XSAVE_YMM_SIZE; + + continue; +nomatch: + NFT_PIPAPO_AVX2_STORE(map[i_ul], 15); +nothing: + ; + } + + return ret; +} + +/** + * nft_pipapo_avx2_lookup_4b_4() - AVX2-based lookup for 4 four-bit groups + * @map: Previous match result, used as initial bitmap + * @fill: Destination bitmap to be filled with current match result + * @f: Field, containing lookup and mapping tables + * @offset: Ignore buckets before the given index, no bits are filled there + * @pkt: Packet data, pointer to input nftables register + * @first: If this is the first field, don't source previous result + * @last: Last field: stop at the first match and return bit index + * + * See nft_pipapo_avx2_lookup_4b_2(). + * + * This is used for 16-bit fields (i.e. ports). + * + * Return: -1 on no match, rule index of match if @last, otherwise first long + * word index to be checked next (i.e. first filled word). + */ +static int nft_pipapo_avx2_lookup_4b_4(unsigned long *map, unsigned long *fill, + struct nft_pipapo_field *f, int offset, + const u8 *pkt, bool first, bool last) +{ + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + u8 pg[4] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf }; + unsigned long *lt = f->lt, bsize = f->bsize; + + lt += offset * NFT_PIPAPO_LONGS_PER_M256; + for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) { + int i_ul = i * NFT_PIPAPO_LONGS_PER_M256; + + if (first) { + NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 1, pg[1], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 2, pg[2], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 3, pg[3], bsize); + NFT_PIPAPO_AVX2_AND(4, 0, 1); + NFT_PIPAPO_AVX2_AND(5, 2, 3); + NFT_PIPAPO_AVX2_AND(7, 4, 5); + } else { + NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize); + + NFT_PIPAPO_AVX2_LOAD(1, map[i_ul]); + + NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 1, pg[1], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 2, pg[2], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt, 3, pg[3], bsize); + NFT_PIPAPO_AVX2_AND(5, 0, 1); + + NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing); + + NFT_PIPAPO_AVX2_AND(6, 2, 3); + NFT_PIPAPO_AVX2_AND(7, 4, 5); + /* Stall */ + NFT_PIPAPO_AVX2_AND(7, 6, 7); + } + + /* Stall */ + NFT_PIPAPO_AVX2_NOMATCH_GOTO(7, nomatch); + NFT_PIPAPO_AVX2_STORE(map[i_ul], 7); + + b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); + if (last) + return b; + + if (unlikely(ret == -1)) + ret = b / XSAVE_YMM_SIZE; + + continue; +nomatch: + NFT_PIPAPO_AVX2_STORE(map[i_ul], 15); +nothing: + ; + } + + return ret; +} + +/** + * nft_pipapo_avx2_lookup_4b_8() - AVX2-based lookup for 8 four-bit groups + * @map: Previous match result, used as initial bitmap + * @fill: Destination bitmap to be filled with current match result + * @f: Field, containing lookup and mapping tables + * @offset: Ignore buckets before the given index, no bits are filled there + * @pkt: Packet data, pointer to input nftables register + * @first: If this is the first field, don't source previous result + * @last: Last field: stop at the first match and return bit index + * + * See nft_pipapo_avx2_lookup_4b_2(). + * + * This is used for 32-bit fields (i.e. IPv4 addresses). + * + * Return: -1 on no match, rule index of match if @last, otherwise first long + * word index to be checked next (i.e. first filled word). + */ +static int nft_pipapo_avx2_lookup_4b_8(unsigned long *map, unsigned long *fill, + struct nft_pipapo_field *f, int offset, + const u8 *pkt, bool first, bool last) +{ + u8 pg[8] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf, + pkt[2] >> 4, pkt[2] & 0xf, pkt[3] >> 4, pkt[3] & 0xf, + }; + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + unsigned long *lt = f->lt, bsize = f->bsize; + + lt += offset * NFT_PIPAPO_LONGS_PER_M256; + for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) { + int i_ul = i * NFT_PIPAPO_LONGS_PER_M256; + + if (first) { + NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 1, pg[1], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 2, pg[2], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 3, pg[3], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt, 4, pg[4], bsize); + NFT_PIPAPO_AVX2_AND(5, 0, 1); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(6, lt, 5, pg[5], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt, 6, pg[6], bsize); + NFT_PIPAPO_AVX2_AND(8, 2, 3); + NFT_PIPAPO_AVX2_AND(9, 4, 5); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt, 7, pg[7], bsize); + NFT_PIPAPO_AVX2_AND(11, 6, 7); + NFT_PIPAPO_AVX2_AND(12, 8, 9); + NFT_PIPAPO_AVX2_AND(13, 10, 11); + + /* Stall */ + NFT_PIPAPO_AVX2_AND(1, 12, 13); + } else { + NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize); + NFT_PIPAPO_AVX2_LOAD(1, map[i_ul]); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 1, pg[1], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 2, pg[2], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt, 3, pg[3], bsize); + + NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing); + + NFT_PIPAPO_AVX2_AND(5, 0, 1); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(6, lt, 4, pg[4], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt, 5, pg[5], bsize); + NFT_PIPAPO_AVX2_AND(8, 2, 3); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(9, lt, 6, pg[6], bsize); + NFT_PIPAPO_AVX2_AND(10, 4, 5); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(11, lt, 7, pg[7], bsize); + NFT_PIPAPO_AVX2_AND(12, 6, 7); + NFT_PIPAPO_AVX2_AND(13, 8, 9); + NFT_PIPAPO_AVX2_AND(14, 10, 11); + + /* Stall */ + NFT_PIPAPO_AVX2_AND(1, 12, 13); + NFT_PIPAPO_AVX2_AND(1, 1, 14); + } + + NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nomatch); + NFT_PIPAPO_AVX2_STORE(map[i_ul], 1); + + b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); + if (last) + return b; + + if (unlikely(ret == -1)) + ret = b / XSAVE_YMM_SIZE; + + continue; + +nomatch: + NFT_PIPAPO_AVX2_STORE(map[i_ul], 15); +nothing: + ; + } + + return ret; +} + +/** + * nft_pipapo_avx2_lookup_4b_12() - AVX2-based lookup for 12 four-bit groups + * @map: Previous match result, used as initial bitmap + * @fill: Destination bitmap to be filled with current match result + * @f: Field, containing lookup and mapping tables + * @offset: Ignore buckets before the given index, no bits are filled there + * @pkt: Packet data, pointer to input nftables register + * @first: If this is the first field, don't source previous result + * @last: Last field: stop at the first match and return bit index + * + * See nft_pipapo_avx2_lookup_4b_2(). + * + * This is used for 48-bit fields (i.e. MAC addresses/EUI-48). + * + * Return: -1 on no match, rule index of match if @last, otherwise first long + * word index to be checked next (i.e. first filled word). + */ +static int nft_pipapo_avx2_lookup_4b_12(unsigned long *map, unsigned long *fill, + struct nft_pipapo_field *f, int offset, + const u8 *pkt, bool first, bool last) +{ + u8 pg[12] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf, + pkt[2] >> 4, pkt[2] & 0xf, pkt[3] >> 4, pkt[3] & 0xf, + pkt[4] >> 4, pkt[4] & 0xf, pkt[5] >> 4, pkt[5] & 0xf, + }; + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + unsigned long *lt = f->lt, bsize = f->bsize; + + lt += offset * NFT_PIPAPO_LONGS_PER_M256; + for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) { + int i_ul = i * NFT_PIPAPO_LONGS_PER_M256; + + if (!first) + NFT_PIPAPO_AVX2_LOAD(0, map[i_ul]); + + NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 0, pg[0], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 1, pg[1], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 2, pg[2], bsize); + + if (!first) { + NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing); + NFT_PIPAPO_AVX2_AND(1, 1, 0); + } + + NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt, 3, pg[3], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(5, lt, 4, pg[4], bsize); + NFT_PIPAPO_AVX2_AND(6, 2, 3); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt, 5, pg[5], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(8, lt, 6, pg[6], bsize); + NFT_PIPAPO_AVX2_AND(9, 1, 4); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt, 7, pg[7], bsize); + NFT_PIPAPO_AVX2_AND(11, 5, 6); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt, 8, pg[8], bsize); + NFT_PIPAPO_AVX2_AND(13, 7, 8); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(14, lt, 9, pg[9], bsize); + + NFT_PIPAPO_AVX2_AND(0, 9, 10); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 10, pg[10], bsize); + NFT_PIPAPO_AVX2_AND(2, 11, 12); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 11, pg[11], bsize); + NFT_PIPAPO_AVX2_AND(4, 13, 14); + NFT_PIPAPO_AVX2_AND(5, 0, 1); + + NFT_PIPAPO_AVX2_AND(6, 2, 3); + + /* Stalls */ + NFT_PIPAPO_AVX2_AND(7, 4, 5); + NFT_PIPAPO_AVX2_AND(8, 6, 7); + + NFT_PIPAPO_AVX2_NOMATCH_GOTO(8, nomatch); + NFT_PIPAPO_AVX2_STORE(map[i_ul], 8); + + b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); + if (last) + return b; + + if (unlikely(ret == -1)) + ret = b / XSAVE_YMM_SIZE; + + continue; +nomatch: + NFT_PIPAPO_AVX2_STORE(map[i_ul], 15); +nothing: + ; + } + + return ret; +} + +/** + * nft_pipapo_avx2_lookup_4b_32() - AVX2-based lookup for 32 four-bit groups + * @map: Previous match result, used as initial bitmap + * @fill: Destination bitmap to be filled with current match result + * @f: Field, containing lookup and mapping tables + * @offset: Ignore buckets before the given index, no bits are filled there + * @pkt: Packet data, pointer to input nftables register + * @first: If this is the first field, don't source previous result + * @last: Last field: stop at the first match and return bit index + * + * See nft_pipapo_avx2_lookup_4b_2(). + * + * This is used for 128-bit fields (i.e. IPv6 addresses). + * + * Return: -1 on no match, rule index of match if @last, otherwise first long + * word index to be checked next (i.e. first filled word). + */ +static int nft_pipapo_avx2_lookup_4b_32(unsigned long *map, unsigned long *fill, + struct nft_pipapo_field *f, int offset, + const u8 *pkt, bool first, bool last) +{ + u8 pg[32] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf, + pkt[2] >> 4, pkt[2] & 0xf, pkt[3] >> 4, pkt[3] & 0xf, + pkt[4] >> 4, pkt[4] & 0xf, pkt[5] >> 4, pkt[5] & 0xf, + pkt[6] >> 4, pkt[6] & 0xf, pkt[7] >> 4, pkt[7] & 0xf, + pkt[8] >> 4, pkt[8] & 0xf, pkt[9] >> 4, pkt[9] & 0xf, + pkt[10] >> 4, pkt[10] & 0xf, pkt[11] >> 4, pkt[11] & 0xf, + pkt[12] >> 4, pkt[12] & 0xf, pkt[13] >> 4, pkt[13] & 0xf, + pkt[14] >> 4, pkt[14] & 0xf, pkt[15] >> 4, pkt[15] & 0xf, + }; + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + unsigned long *lt = f->lt, bsize = f->bsize; + + lt += offset * NFT_PIPAPO_LONGS_PER_M256; + for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) { + int i_ul = i * NFT_PIPAPO_LONGS_PER_M256; + + if (!first) + NFT_PIPAPO_AVX2_LOAD(0, map[i_ul]); + + NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 0, pg[0], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 1, pg[1], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 2, pg[2], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt, 3, pg[3], bsize); + if (!first) { + NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing); + NFT_PIPAPO_AVX2_AND(1, 1, 0); + } + + NFT_PIPAPO_AVX2_AND(5, 2, 3); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(6, lt, 4, pg[4], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt, 5, pg[5], bsize); + NFT_PIPAPO_AVX2_AND(8, 1, 4); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(9, lt, 6, pg[6], bsize); + NFT_PIPAPO_AVX2_AND(10, 5, 6); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(11, lt, 7, pg[7], bsize); + NFT_PIPAPO_AVX2_AND(12, 7, 8); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(13, lt, 8, pg[8], bsize); + NFT_PIPAPO_AVX2_AND(14, 9, 10); + + NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 9, pg[9], bsize); + NFT_PIPAPO_AVX2_AND(1, 11, 12); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 10, pg[10], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 11, pg[11], bsize); + NFT_PIPAPO_AVX2_AND(4, 13, 14); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(5, lt, 12, pg[12], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(6, lt, 13, pg[13], bsize); + NFT_PIPAPO_AVX2_AND(7, 0, 1); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(8, lt, 14, pg[14], bsize); + NFT_PIPAPO_AVX2_AND(9, 2, 3); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt, 15, pg[15], bsize); + NFT_PIPAPO_AVX2_AND(11, 4, 5); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt, 16, pg[16], bsize); + NFT_PIPAPO_AVX2_AND(13, 6, 7); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(14, lt, 17, pg[17], bsize); + + NFT_PIPAPO_AVX2_AND(0, 8, 9); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 18, pg[18], bsize); + NFT_PIPAPO_AVX2_AND(2, 10, 11); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 19, pg[19], bsize); + NFT_PIPAPO_AVX2_AND(4, 12, 13); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(5, lt, 20, pg[20], bsize); + NFT_PIPAPO_AVX2_AND(6, 14, 0); + NFT_PIPAPO_AVX2_AND(7, 1, 2); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(8, lt, 21, pg[21], bsize); + NFT_PIPAPO_AVX2_AND(9, 3, 4); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt, 22, pg[22], bsize); + NFT_PIPAPO_AVX2_AND(11, 5, 6); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt, 23, pg[23], bsize); + NFT_PIPAPO_AVX2_AND(13, 7, 8); + + NFT_PIPAPO_AVX2_BUCKET_LOAD4(14, lt, 24, pg[24], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 25, pg[25], bsize); + NFT_PIPAPO_AVX2_AND(1, 9, 10); + NFT_PIPAPO_AVX2_AND(2, 11, 12); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 26, pg[26], bsize); + NFT_PIPAPO_AVX2_AND(4, 13, 14); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(5, lt, 27, pg[27], bsize); + NFT_PIPAPO_AVX2_AND(6, 0, 1); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt, 28, pg[28], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(8, lt, 29, pg[29], bsize); + NFT_PIPAPO_AVX2_AND(9, 2, 3); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt, 30, pg[30], bsize); + NFT_PIPAPO_AVX2_AND(11, 4, 5); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt, 31, pg[31], bsize); + + NFT_PIPAPO_AVX2_AND(0, 6, 7); + NFT_PIPAPO_AVX2_AND(1, 8, 9); + NFT_PIPAPO_AVX2_AND(2, 10, 11); + NFT_PIPAPO_AVX2_AND(3, 12, 0); + + /* Stalls */ + NFT_PIPAPO_AVX2_AND(4, 1, 2); + NFT_PIPAPO_AVX2_AND(5, 3, 4); + + NFT_PIPAPO_AVX2_NOMATCH_GOTO(5, nomatch); + NFT_PIPAPO_AVX2_STORE(map[i_ul], 5); + + b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); + if (last) + return b; + + if (unlikely(ret == -1)) + ret = b / XSAVE_YMM_SIZE; + + continue; +nomatch: + NFT_PIPAPO_AVX2_STORE(map[i_ul], 15); +nothing: + ; + } + + return ret; +} + +/** + * nft_pipapo_avx2_lookup_8b_1() - AVX2-based lookup for one eight-bit group + * @map: Previous match result, used as initial bitmap + * @fill: Destination bitmap to be filled with current match result + * @f: Field, containing lookup and mapping tables + * @offset: Ignore buckets before the given index, no bits are filled there + * @pkt: Packet data, pointer to input nftables register + * @first: If this is the first field, don't source previous result + * @last: Last field: stop at the first match and return bit index + * + * See nft_pipapo_avx2_lookup_4b_2(). + * + * This is used for 8-bit fields (i.e. protocol numbers). + * + * Return: -1 on no match, rule index of match if @last, otherwise first long + * word index to be checked next (i.e. first filled word). + */ +static int nft_pipapo_avx2_lookup_8b_1(unsigned long *map, unsigned long *fill, + struct nft_pipapo_field *f, int offset, + const u8 *pkt, bool first, bool last) +{ + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + unsigned long *lt = f->lt, bsize = f->bsize; + + lt += offset * NFT_PIPAPO_LONGS_PER_M256; + for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) { + int i_ul = i * NFT_PIPAPO_LONGS_PER_M256; + + if (first) { + NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 0, pkt[0], bsize); + } else { + NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize); + NFT_PIPAPO_AVX2_LOAD(1, map[i_ul]); + NFT_PIPAPO_AVX2_AND(2, 0, 1); + NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing); + } + + NFT_PIPAPO_AVX2_NOMATCH_GOTO(2, nomatch); + NFT_PIPAPO_AVX2_STORE(map[i_ul], 2); + + b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); + if (last) + return b; + + if (unlikely(ret == -1)) + ret = b / XSAVE_YMM_SIZE; + + continue; +nomatch: + NFT_PIPAPO_AVX2_STORE(map[i_ul], 15); +nothing: + ; + } + + return ret; +} + +/** + * nft_pipapo_avx2_lookup_8b_2() - AVX2-based lookup for 2 eight-bit groups + * @map: Previous match result, used as initial bitmap + * @fill: Destination bitmap to be filled with current match result + * @f: Field, containing lookup and mapping tables + * @offset: Ignore buckets before the given index, no bits are filled there + * @pkt: Packet data, pointer to input nftables register + * @first: If this is the first field, don't source previous result + * @last: Last field: stop at the first match and return bit index + * + * See nft_pipapo_avx2_lookup_4b_2(). + * + * This is used for 16-bit fields (i.e. ports). + * + * Return: -1 on no match, rule index of match if @last, otherwise first long + * word index to be checked next (i.e. first filled word). + */ +static int nft_pipapo_avx2_lookup_8b_2(unsigned long *map, unsigned long *fill, + struct nft_pipapo_field *f, int offset, + const u8 *pkt, bool first, bool last) +{ + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + unsigned long *lt = f->lt, bsize = f->bsize; + + lt += offset * NFT_PIPAPO_LONGS_PER_M256; + for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) { + int i_ul = i * NFT_PIPAPO_LONGS_PER_M256; + + if (first) { + NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 1, pkt[1], bsize); + NFT_PIPAPO_AVX2_AND(4, 0, 1); + } else { + NFT_PIPAPO_AVX2_LOAD(0, map[i_ul]); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 0, pkt[0], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 1, pkt[1], bsize); + + /* Stall */ + NFT_PIPAPO_AVX2_AND(3, 0, 1); + NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing); + NFT_PIPAPO_AVX2_AND(4, 3, 2); + } + + /* Stall */ + NFT_PIPAPO_AVX2_NOMATCH_GOTO(4, nomatch); + NFT_PIPAPO_AVX2_STORE(map[i_ul], 4); + + b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); + if (last) + return b; + + if (unlikely(ret == -1)) + ret = b / XSAVE_YMM_SIZE; + + continue; +nomatch: + NFT_PIPAPO_AVX2_STORE(map[i_ul], 15); +nothing: + ; + } + + return ret; +} + +/** + * nft_pipapo_avx2_lookup_8b_4() - AVX2-based lookup for 4 eight-bit groups + * @map: Previous match result, used as initial bitmap + * @fill: Destination bitmap to be filled with current match result + * @f: Field, containing lookup and mapping tables + * @offset: Ignore buckets before the given index, no bits are filled there + * @pkt: Packet data, pointer to input nftables register + * @first: If this is the first field, don't source previous result + * @last: Last field: stop at the first match and return bit index + * + * See nft_pipapo_avx2_lookup_4b_2(). + * + * This is used for 32-bit fields (i.e. IPv4 addresses). + * + * Return: -1 on no match, rule index of match if @last, otherwise first long + * word index to be checked next (i.e. first filled word). + */ +static int nft_pipapo_avx2_lookup_8b_4(unsigned long *map, unsigned long *fill, + struct nft_pipapo_field *f, int offset, + const u8 *pkt, bool first, bool last) +{ + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + unsigned long *lt = f->lt, bsize = f->bsize; + + lt += offset * NFT_PIPAPO_LONGS_PER_M256; + for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) { + int i_ul = i * NFT_PIPAPO_LONGS_PER_M256; + + if (first) { + NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 1, pkt[1], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 2, pkt[2], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 3, pkt[3], bsize); + + /* Stall */ + NFT_PIPAPO_AVX2_AND(4, 0, 1); + NFT_PIPAPO_AVX2_AND(5, 2, 3); + NFT_PIPAPO_AVX2_AND(0, 4, 5); + } else { + NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize); + NFT_PIPAPO_AVX2_LOAD(1, map[i_ul]); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 1, pkt[1], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 2, pkt[2], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt, 3, pkt[3], bsize); + + NFT_PIPAPO_AVX2_AND(5, 0, 1); + NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing); + NFT_PIPAPO_AVX2_AND(6, 2, 3); + + /* Stall */ + NFT_PIPAPO_AVX2_AND(7, 4, 5); + NFT_PIPAPO_AVX2_AND(0, 6, 7); + } + + NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nomatch); + NFT_PIPAPO_AVX2_STORE(map[i_ul], 0); + + b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); + if (last) + return b; + + if (unlikely(ret == -1)) + ret = b / XSAVE_YMM_SIZE; + + continue; + +nomatch: + NFT_PIPAPO_AVX2_STORE(map[i_ul], 15); +nothing: + ; + } + + return ret; +} + +/** + * nft_pipapo_avx2_lookup_8b_6() - AVX2-based lookup for 6 eight-bit groups + * @map: Previous match result, used as initial bitmap + * @fill: Destination bitmap to be filled with current match result + * @f: Field, containing lookup and mapping tables + * @offset: Ignore buckets before the given index, no bits are filled there + * @pkt: Packet data, pointer to input nftables register + * @first: If this is the first field, don't source previous result + * @last: Last field: stop at the first match and return bit index + * + * See nft_pipapo_avx2_lookup_4b_2(). + * + * This is used for 48-bit fields (i.e. MAC addresses/EUI-48). + * + * Return: -1 on no match, rule index of match if @last, otherwise first long + * word index to be checked next (i.e. first filled word). + */ +static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill, + struct nft_pipapo_field *f, int offset, + const u8 *pkt, bool first, bool last) +{ + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + unsigned long *lt = f->lt, bsize = f->bsize; + + lt += offset * NFT_PIPAPO_LONGS_PER_M256; + for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) { + int i_ul = i * NFT_PIPAPO_LONGS_PER_M256; + + if (first) { + NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 1, pkt[1], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 2, pkt[2], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 3, pkt[3], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt, 4, pkt[4], bsize); + + NFT_PIPAPO_AVX2_AND(5, 0, 1); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(6, lt, 6, pkt[5], bsize); + NFT_PIPAPO_AVX2_AND(7, 2, 3); + + /* Stall */ + NFT_PIPAPO_AVX2_AND(0, 4, 5); + NFT_PIPAPO_AVX2_AND(1, 6, 7); + NFT_PIPAPO_AVX2_AND(4, 0, 1); + } else { + NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize); + NFT_PIPAPO_AVX2_LOAD(1, map[i_ul]); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 1, pkt[1], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 2, pkt[2], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt, 3, pkt[3], bsize); + + NFT_PIPAPO_AVX2_AND(5, 0, 1); + NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing); + + NFT_PIPAPO_AVX2_AND(6, 2, 3); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt, 4, pkt[4], bsize); + NFT_PIPAPO_AVX2_AND(0, 4, 5); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 5, pkt[5], bsize); + NFT_PIPAPO_AVX2_AND(2, 6, 7); + + /* Stall */ + NFT_PIPAPO_AVX2_AND(3, 0, 1); + NFT_PIPAPO_AVX2_AND(4, 2, 3); + } + + NFT_PIPAPO_AVX2_NOMATCH_GOTO(4, nomatch); + NFT_PIPAPO_AVX2_STORE(map[i_ul], 4); + + b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); + if (last) + return b; + + if (unlikely(ret == -1)) + ret = b / XSAVE_YMM_SIZE; + + continue; + +nomatch: + NFT_PIPAPO_AVX2_STORE(map[i_ul], 15); +nothing: + ; + } + + return ret; +} + +/** + * nft_pipapo_avx2_lookup_8b_16() - AVX2-based lookup for 16 eight-bit groups + * @map: Previous match result, used as initial bitmap + * @fill: Destination bitmap to be filled with current match result + * @f: Field, containing lookup and mapping tables + * @offset: Ignore buckets before the given index, no bits are filled there + * @pkt: Packet data, pointer to input nftables register + * @first: If this is the first field, don't source previous result + * @last: Last field: stop at the first match and return bit index + * + * See nft_pipapo_avx2_lookup_4b_2(). + * + * This is used for 128-bit fields (i.e. IPv6 addresses). + * + * Return: -1 on no match, rule index of match if @last, otherwise first long + * word index to be checked next (i.e. first filled word). + */ +static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill, + struct nft_pipapo_field *f, int offset, + const u8 *pkt, bool first, bool last) +{ + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + unsigned long *lt = f->lt, bsize = f->bsize; + + lt += offset * NFT_PIPAPO_LONGS_PER_M256; + for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) { + int i_ul = i * NFT_PIPAPO_LONGS_PER_M256; + + if (!first) + NFT_PIPAPO_AVX2_LOAD(0, map[i_ul]); + + NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 0, pkt[0], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 1, pkt[1], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 2, pkt[2], bsize); + if (!first) { + NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing); + NFT_PIPAPO_AVX2_AND(1, 1, 0); + } + NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt, 3, pkt[3], bsize); + + NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt, 4, pkt[4], bsize); + NFT_PIPAPO_AVX2_AND(6, 1, 2); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt, 5, pkt[5], bsize); + NFT_PIPAPO_AVX2_AND(0, 3, 4); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 6, pkt[6], bsize); + + NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 7, pkt[7], bsize); + NFT_PIPAPO_AVX2_AND(3, 5, 6); + NFT_PIPAPO_AVX2_AND(4, 0, 1); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt, 8, pkt[8], bsize); + + NFT_PIPAPO_AVX2_AND(6, 2, 3); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt, 9, pkt[9], bsize); + NFT_PIPAPO_AVX2_AND(0, 4, 5); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 10, pkt[10], bsize); + NFT_PIPAPO_AVX2_AND(2, 6, 7); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 11, pkt[11], bsize); + NFT_PIPAPO_AVX2_AND(4, 0, 1); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt, 12, pkt[12], bsize); + NFT_PIPAPO_AVX2_AND(6, 2, 3); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt, 13, pkt[13], bsize); + NFT_PIPAPO_AVX2_AND(0, 4, 5); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 14, pkt[14], bsize); + NFT_PIPAPO_AVX2_AND(2, 6, 7); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 15, pkt[15], bsize); + NFT_PIPAPO_AVX2_AND(4, 0, 1); + + /* Stall */ + NFT_PIPAPO_AVX2_AND(5, 2, 3); + NFT_PIPAPO_AVX2_AND(6, 4, 5); + + NFT_PIPAPO_AVX2_NOMATCH_GOTO(6, nomatch); + NFT_PIPAPO_AVX2_STORE(map[i_ul], 6); + + b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); + if (last) + return b; + + if (unlikely(ret == -1)) + ret = b / XSAVE_YMM_SIZE; + + continue; + +nomatch: + NFT_PIPAPO_AVX2_STORE(map[i_ul], 15); +nothing: + ; + } + + return ret; +} + +/** + * nft_pipapo_avx2_lookup_slow() - Fallback function for uncommon field sizes + * @map: Previous match result, used as initial bitmap + * @fill: Destination bitmap to be filled with current match result + * @f: Field, containing lookup and mapping tables + * @offset: Ignore buckets before the given index, no bits are filled there + * @pkt: Packet data, pointer to input nftables register + * @first: If this is the first field, don't source previous result + * @last: Last field: stop at the first match and return bit index + * + * This function should never be called, but is provided for the case the field + * size doesn't match any of the known data types. Matching rate is + * substantially lower than AVX2 routines. + * + * Return: -1 on no match, rule index of match if @last, otherwise first long + * word index to be checked next (i.e. first filled word). + */ +static int nft_pipapo_avx2_lookup_slow(unsigned long *map, unsigned long *fill, + struct nft_pipapo_field *f, int offset, + const u8 *pkt, bool first, bool last) +{ + unsigned long *lt = f->lt, bsize = f->bsize; + int i, ret = -1, b; + + lt += offset * NFT_PIPAPO_LONGS_PER_M256; + + if (first) + memset(map, 0xff, bsize * sizeof(*map)); + + for (i = offset; i < bsize; i++) { + if (f->bb == 8) + pipapo_and_field_buckets_8bit(f, map, pkt); + else + pipapo_and_field_buckets_4bit(f, map, pkt); + NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4; + + b = pipapo_refill(map, bsize, f->rules, fill, f->mt, last); + + if (last) + return b; + + if (ret == -1) + ret = b / XSAVE_YMM_SIZE; + } + + return ret; +} + +/** + * nft_pipapo_avx2_estimate() - Set size, space and lookup complexity + * @desc: Set description, element count and field description used + * @features: Flags: NFT_SET_INTERVAL needs to be there + * @est: Storage for estimation data + * + * Return: true if set is compatible and AVX2 available, false otherwise. + */ +bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features, + struct nft_set_estimate *est) +{ + if (!(features & NFT_SET_INTERVAL) || + desc->field_count < NFT_PIPAPO_MIN_FIELDS) + return false; + + if (!boot_cpu_has(X86_FEATURE_AVX2) || !boot_cpu_has(X86_FEATURE_AVX)) + return false; + + est->size = pipapo_estimate_size(desc); + if (!est->size) + return false; + + est->lookup = NFT_SET_CLASS_O_LOG_N; + + est->space = NFT_SET_CLASS_O_N; + + return true; +} + +/** + * nft_pipapo_avx2_lookup() - Lookup function for AVX2 implementation + * @net: Network namespace + * @set: nftables API set representation + * @elem: nftables API element representation containing key data + * @ext: nftables API extension pointer, filled with matching reference + * + * For more details, see DOC: Theory of Operation in nft_set_pipapo.c. + * + * This implementation exploits the repetitive characteristic of the algorithm + * to provide a fast, vectorised version using the AVX2 SIMD instruction set. + * + * Return: true on match, false otherwise. + */ +bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext) +{ + struct nft_pipapo *priv = nft_set_priv(set); + unsigned long *res, *fill, *scratch; + u8 genmask = nft_genmask_cur(net); + const u8 *rp = (const u8 *)key; + struct nft_pipapo_match *m; + struct nft_pipapo_field *f; + bool map_index; + int i, ret = 0; + + m = rcu_dereference(priv->match); + + /* This also protects access to all data related to scratch maps */ + kernel_fpu_begin(); + + scratch = *raw_cpu_ptr(m->scratch_aligned); + if (unlikely(!scratch)) { + kernel_fpu_end(); + return false; + } + map_index = raw_cpu_read(nft_pipapo_avx2_scratch_index); + + res = scratch + (map_index ? m->bsize_max : 0); + fill = scratch + (map_index ? 0 : m->bsize_max); + + /* Starting map doesn't need to be set for this implementation */ + + nft_pipapo_avx2_prepare(); + +next_match: + nft_pipapo_for_each_field(f, i, m) { + bool last = i == m->field_count - 1, first = !i; + +#define NFT_SET_PIPAPO_AVX2_LOOKUP(b, n) \ + (ret = nft_pipapo_avx2_lookup_##b##b_##n(res, fill, f, \ + ret, rp, \ + first, last)) + + if (likely(f->bb == 8)) { + if (f->groups == 1) { + NFT_SET_PIPAPO_AVX2_LOOKUP(8, 1); + } else if (f->groups == 2) { + NFT_SET_PIPAPO_AVX2_LOOKUP(8, 2); + } else if (f->groups == 4) { + NFT_SET_PIPAPO_AVX2_LOOKUP(8, 4); + } else if (f->groups == 6) { + NFT_SET_PIPAPO_AVX2_LOOKUP(8, 6); + } else if (f->groups == 16) { + NFT_SET_PIPAPO_AVX2_LOOKUP(8, 16); + } else { + ret = nft_pipapo_avx2_lookup_slow(res, fill, f, + ret, rp, + first, last); + } + } else { + if (f->groups == 2) { + NFT_SET_PIPAPO_AVX2_LOOKUP(4, 2); + } else if (f->groups == 4) { + NFT_SET_PIPAPO_AVX2_LOOKUP(4, 4); + } else if (f->groups == 8) { + NFT_SET_PIPAPO_AVX2_LOOKUP(4, 8); + } else if (f->groups == 12) { + NFT_SET_PIPAPO_AVX2_LOOKUP(4, 12); + } else if (f->groups == 32) { + NFT_SET_PIPAPO_AVX2_LOOKUP(4, 32); + } else { + ret = nft_pipapo_avx2_lookup_slow(res, fill, f, + ret, rp, + first, last); + } + } + NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4; + +#undef NFT_SET_PIPAPO_AVX2_LOOKUP + + if (ret < 0) + goto out; + + if (last) { + *ext = &f->mt[ret].e->ext; + if (unlikely(nft_set_elem_expired(*ext) || + !nft_set_elem_active(*ext, genmask))) { + ret = 0; + goto next_match; + } + + goto out; + } + + swap(res, fill); + rp += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); + } + +out: + if (i % 2) + raw_cpu_write(nft_pipapo_avx2_scratch_index, !map_index); + kernel_fpu_end(); + + return ret >= 0; +} diff --git a/net/netfilter/nft_set_pipapo_avx2.h b/net/netfilter/nft_set_pipapo_avx2.h new file mode 100644 index 000000000000..396caf7bfca8 --- /dev/null +++ b/net/netfilter/nft_set_pipapo_avx2.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _NFT_SET_PIPAPO_AVX2_H + +#ifdef CONFIG_AS_AVX2 +#include <asm/fpu/xstate.h> +#define NFT_PIPAPO_ALIGN (XSAVE_YMM_SIZE / BITS_PER_BYTE) + +bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext); +bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features, + struct nft_set_estimate *est); +#endif /* CONFIG_AS_AVX2 */ + +#endif /* _NFT_SET_PIPAPO_AVX2_H */ diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 5000b938ab1e..172ef8189f99 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -481,8 +481,7 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, return true; } -struct nft_set_type nft_set_rbtree_type __read_mostly = { - .owner = THIS_MODULE, +const struct nft_set_type nft_set_rbtree_type = { .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, .ops = { .privsize = nft_rbtree_privsize, diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 4c3f2e24c7cb..30be5787fbde 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -11,6 +11,7 @@ #include <net/ip_tunnels.h> #include <net/vxlan.h> #include <net/erspan.h> +#include <net/geneve.h> struct nft_tunnel { enum nft_tunnel_keys key:8; @@ -144,6 +145,7 @@ struct nft_tunnel_opts { union { struct vxlan_metadata vxlan; struct erspan_metadata erspan; + u8 data[IP_TUNNEL_OPTS_MAX]; } u; u32 len; __be16 flags; @@ -301,9 +303,53 @@ static int nft_tunnel_obj_erspan_init(const struct nlattr *attr, return 0; } +static const struct nla_policy nft_tunnel_opts_geneve_policy[NFTA_TUNNEL_KEY_GENEVE_MAX + 1] = { + [NFTA_TUNNEL_KEY_GENEVE_CLASS] = { .type = NLA_U16 }, + [NFTA_TUNNEL_KEY_GENEVE_TYPE] = { .type = NLA_U8 }, + [NFTA_TUNNEL_KEY_GENEVE_DATA] = { .type = NLA_BINARY, .len = 128 }, +}; + +static int nft_tunnel_obj_geneve_init(const struct nlattr *attr, + struct nft_tunnel_opts *opts) +{ + struct geneve_opt *opt = (struct geneve_opt *)opts->u.data + opts->len; + struct nlattr *tb[NFTA_TUNNEL_KEY_GENEVE_MAX + 1]; + int err, data_len; + + err = nla_parse_nested(tb, NFTA_TUNNEL_KEY_GENEVE_MAX, attr, + nft_tunnel_opts_geneve_policy, NULL); + if (err < 0) + return err; + + if (!tb[NFTA_TUNNEL_KEY_GENEVE_CLASS] || + !tb[NFTA_TUNNEL_KEY_GENEVE_TYPE] || + !tb[NFTA_TUNNEL_KEY_GENEVE_DATA]) + return -EINVAL; + + attr = tb[NFTA_TUNNEL_KEY_GENEVE_DATA]; + data_len = nla_len(attr); + if (data_len % 4) + return -EINVAL; + + opts->len += sizeof(*opt) + data_len; + if (opts->len > IP_TUNNEL_OPTS_MAX) + return -EINVAL; + + memcpy(opt->opt_data, nla_data(attr), data_len); + opt->length = data_len / 4; + opt->opt_class = nla_get_be16(tb[NFTA_TUNNEL_KEY_GENEVE_CLASS]); + opt->type = nla_get_u8(tb[NFTA_TUNNEL_KEY_GENEVE_TYPE]); + opts->flags = TUNNEL_GENEVE_OPT; + + return 0; +} + static const struct nla_policy nft_tunnel_opts_policy[NFTA_TUNNEL_KEY_OPTS_MAX + 1] = { + [NFTA_TUNNEL_KEY_OPTS_UNSPEC] = { + .strict_start_type = NFTA_TUNNEL_KEY_OPTS_GENEVE }, [NFTA_TUNNEL_KEY_OPTS_VXLAN] = { .type = NLA_NESTED, }, [NFTA_TUNNEL_KEY_OPTS_ERSPAN] = { .type = NLA_NESTED, }, + [NFTA_TUNNEL_KEY_OPTS_GENEVE] = { .type = NLA_NESTED, }, }; static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx, @@ -311,22 +357,43 @@ static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx, struct ip_tunnel_info *info, struct nft_tunnel_opts *opts) { - struct nlattr *tb[NFTA_TUNNEL_KEY_OPTS_MAX + 1]; - int err; + int err, rem, type = 0; + struct nlattr *nla; - err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_OPTS_MAX, attr, - nft_tunnel_opts_policy, NULL); + err = nla_validate_nested_deprecated(attr, NFTA_TUNNEL_KEY_OPTS_MAX, + nft_tunnel_opts_policy, NULL); if (err < 0) return err; - if (tb[NFTA_TUNNEL_KEY_OPTS_VXLAN]) { - err = nft_tunnel_obj_vxlan_init(tb[NFTA_TUNNEL_KEY_OPTS_VXLAN], - opts); - } else if (tb[NFTA_TUNNEL_KEY_OPTS_ERSPAN]) { - err = nft_tunnel_obj_erspan_init(tb[NFTA_TUNNEL_KEY_OPTS_ERSPAN], - opts); - } else { - return -EOPNOTSUPP; + nla_for_each_attr(nla, nla_data(attr), nla_len(attr), rem) { + switch (nla_type(nla)) { + case NFTA_TUNNEL_KEY_OPTS_VXLAN: + if (type) + return -EINVAL; + err = nft_tunnel_obj_vxlan_init(nla, opts); + if (err) + return err; + type = TUNNEL_VXLAN_OPT; + break; + case NFTA_TUNNEL_KEY_OPTS_ERSPAN: + if (type) + return -EINVAL; + err = nft_tunnel_obj_erspan_init(nla, opts); + if (err) + return err; + type = TUNNEL_ERSPAN_OPT; + break; + case NFTA_TUNNEL_KEY_OPTS_GENEVE: + if (type && type != TUNNEL_GENEVE_OPT) + return -EINVAL; + err = nft_tunnel_obj_geneve_init(nla, opts); + if (err) + return err; + type = TUNNEL_GENEVE_OPT; + break; + default: + return -EOPNOTSUPP; + } } return err; @@ -339,6 +406,8 @@ static const struct nla_policy nft_tunnel_key_policy[NFTA_TUNNEL_KEY_MAX + 1] = [NFTA_TUNNEL_KEY_FLAGS] = { .type = NLA_U32, }, [NFTA_TUNNEL_KEY_TOS] = { .type = NLA_U8, }, [NFTA_TUNNEL_KEY_TTL] = { .type = NLA_U8, }, + [NFTA_TUNNEL_KEY_SPORT] = { .type = NLA_U16, }, + [NFTA_TUNNEL_KEY_DPORT] = { .type = NLA_U16, }, [NFTA_TUNNEL_KEY_OPTS] = { .type = NLA_NESTED, }, }; @@ -516,6 +585,25 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb, break; } nla_nest_end(skb, inner); + } else if (opts->flags & TUNNEL_GENEVE_OPT) { + struct geneve_opt *opt; + int offset = 0; + + inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_GENEVE); + if (!inner) + goto failure; + while (opts->len > offset) { + opt = (struct geneve_opt *)opts->u.data + offset; + if (nla_put_be16(skb, NFTA_TUNNEL_KEY_GENEVE_CLASS, + opt->opt_class) || + nla_put_u8(skb, NFTA_TUNNEL_KEY_GENEVE_TYPE, + opt->type) || + nla_put(skb, NFTA_TUNNEL_KEY_GENEVE_DATA, + opt->length * 4, opt->opt_data)) + goto inner_failure; + offset += sizeof(*opt) + opt->length * 4; + } + nla_nest_end(skb, inner); } nla_nest_end(skb, nest); return 0; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index e27c6c5ba9df..cd2b034eef59 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1551,6 +1551,9 @@ static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos, uint8_t nfproto = (unsigned long)PDE_DATA(file_inode(seq->file)); struct nf_mttg_trav *trav = seq->private; + if (ppos != NULL) + ++(*ppos); + switch (trav->class) { case MTTG_TRAV_INIT: trav->class = MTTG_TRAV_NFP_UNSPEC; @@ -1576,9 +1579,6 @@ static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos, default: return NULL; } - - if (ppos != NULL) - ++*ppos; return trav; } diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index f56d3ed93e56..75bd0e5dd312 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -18,6 +18,7 @@ #include <linux/module.h> #include <linux/timer.h> +#include <linux/alarmtimer.h> #include <linux/list.h> #include <linux/mutex.h> #include <linux/netfilter.h> @@ -30,6 +31,7 @@ struct idletimer_tg { struct list_head entry; + struct alarm alarm; struct timer_list timer; struct work_struct work; @@ -37,6 +39,7 @@ struct idletimer_tg { struct device_attribute attr; unsigned int refcnt; + u8 timer_type; }; static LIST_HEAD(idletimer_tg_list); @@ -62,20 +65,29 @@ static ssize_t idletimer_tg_show(struct device *dev, { struct idletimer_tg *timer; unsigned long expires = 0; + struct timespec64 ktimespec = {}; + long time_diff = 0; mutex_lock(&list_mutex); timer = __idletimer_tg_find_by_label(attr->attr.name); - if (timer) - expires = timer->timer.expires; + if (timer) { + if (timer->timer_type & XT_IDLETIMER_ALARM) { + ktime_t expires_alarm = alarm_expires_remaining(&timer->alarm); + ktimespec = ktime_to_timespec64(expires_alarm); + time_diff = ktimespec.tv_sec; + } else { + expires = timer->timer.expires; + time_diff = jiffies_to_msecs(expires - jiffies) / 1000; + } + } mutex_unlock(&list_mutex); - if (time_after(expires, jiffies)) - return sprintf(buf, "%u\n", - jiffies_to_msecs(expires - jiffies) / 1000); + if (time_after(expires, jiffies) || ktimespec.tv_sec > 0) + return snprintf(buf, PAGE_SIZE, "%ld\n", time_diff); - return sprintf(buf, "0\n"); + return snprintf(buf, PAGE_SIZE, "0\n"); } static void idletimer_tg_work(struct work_struct *work) @@ -95,6 +107,16 @@ static void idletimer_tg_expired(struct timer_list *t) schedule_work(&timer->work); } +static enum alarmtimer_restart idletimer_tg_alarmproc(struct alarm *alarm, + ktime_t now) +{ + struct idletimer_tg *timer = alarm->data; + + pr_debug("alarm %s expired\n", timer->attr.attr.name); + schedule_work(&timer->work); + return ALARMTIMER_NORESTART; +} + static int idletimer_check_sysfs_name(const char *name, unsigned int size) { int ret; @@ -160,6 +182,68 @@ out: return ret; } +static int idletimer_tg_create_v1(struct idletimer_tg_info_v1 *info) +{ + int ret; + + info->timer = kmalloc(sizeof(*info->timer), GFP_KERNEL); + if (!info->timer) { + ret = -ENOMEM; + goto out; + } + + ret = idletimer_check_sysfs_name(info->label, sizeof(info->label)); + if (ret < 0) + goto out_free_timer; + + sysfs_attr_init(&info->timer->attr.attr); + info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL); + if (!info->timer->attr.attr.name) { + ret = -ENOMEM; + goto out_free_timer; + } + info->timer->attr.attr.mode = 0444; + info->timer->attr.show = idletimer_tg_show; + + ret = sysfs_create_file(idletimer_tg_kobj, &info->timer->attr.attr); + if (ret < 0) { + pr_debug("couldn't add file to sysfs"); + goto out_free_attr; + } + + /* notify userspace */ + kobject_uevent(idletimer_tg_kobj,KOBJ_ADD); + + list_add(&info->timer->entry, &idletimer_tg_list); + pr_debug("timer type value is %u", info->timer_type); + info->timer->timer_type = info->timer_type; + info->timer->refcnt = 1; + + INIT_WORK(&info->timer->work, idletimer_tg_work); + + if (info->timer->timer_type & XT_IDLETIMER_ALARM) { + ktime_t tout; + alarm_init(&info->timer->alarm, ALARM_BOOTTIME, + idletimer_tg_alarmproc); + info->timer->alarm.data = info->timer; + tout = ktime_set(info->timeout, 0); + alarm_start_relative(&info->timer->alarm, tout); + } else { + timer_setup(&info->timer->timer, idletimer_tg_expired, 0); + mod_timer(&info->timer->timer, + msecs_to_jiffies(info->timeout * 1000) + jiffies); + } + + return 0; + +out_free_attr: + kfree(info->timer->attr.attr.name); +out_free_timer: + kfree(info->timer); +out: + return ret; +} + /* * The actual xt_tables plugin. */ @@ -177,13 +261,30 @@ static unsigned int idletimer_tg_target(struct sk_buff *skb, return XT_CONTINUE; } -static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) +/* + * The actual xt_tables plugin. + */ +static unsigned int idletimer_tg_target_v1(struct sk_buff *skb, + const struct xt_action_param *par) { - struct idletimer_tg_info *info = par->targinfo; - int ret; + const struct idletimer_tg_info_v1 *info = par->targinfo; - pr_debug("checkentry targinfo%s\n", info->label); + pr_debug("resetting timer %s, timeout period %u\n", + info->label, info->timeout); + + if (info->timer->timer_type & XT_IDLETIMER_ALARM) { + ktime_t tout = ktime_set(info->timeout, 0); + alarm_start_relative(&info->timer->alarm, tout); + } else { + mod_timer(&info->timer->timer, + msecs_to_jiffies(info->timeout * 1000) + jiffies); + } + return XT_CONTINUE; +} + +static int idletimer_tg_helper(struct idletimer_tg_info *info) +{ if (info->timeout == 0) { pr_debug("timeout value is zero\n"); return -EINVAL; @@ -198,7 +299,23 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) pr_debug("label is empty or not nul-terminated\n"); return -EINVAL; } + return 0; +} + +static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) +{ + struct idletimer_tg_info *info = par->targinfo; + int ret; + + pr_debug("checkentry targinfo%s\n", info->label); + + ret = idletimer_tg_helper(info); + if(ret < 0) + { + pr_debug("checkentry helper return invalid\n"); + return -EINVAL; + } mutex_lock(&list_mutex); info->timer = __idletimer_tg_find_by_label(info->label); @@ -222,6 +339,65 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) return 0; } +static int idletimer_tg_checkentry_v1(const struct xt_tgchk_param *par) +{ + struct idletimer_tg_info_v1 *info = par->targinfo; + int ret; + + pr_debug("checkentry targinfo%s\n", info->label); + + ret = idletimer_tg_helper((struct idletimer_tg_info *)info); + if(ret < 0) + { + pr_debug("checkentry helper return invalid\n"); + return -EINVAL; + } + + if (info->timer_type > XT_IDLETIMER_ALARM) { + pr_debug("invalid value for timer type\n"); + return -EINVAL; + } + + mutex_lock(&list_mutex); + + info->timer = __idletimer_tg_find_by_label(info->label); + if (info->timer) { + if (info->timer->timer_type != info->timer_type) { + pr_debug("Adding/Replacing rule with same label and different timer type is not allowed\n"); + mutex_unlock(&list_mutex); + return -EINVAL; + } + + info->timer->refcnt++; + if (info->timer_type & XT_IDLETIMER_ALARM) { + /* calculate remaining expiry time */ + ktime_t tout = alarm_expires_remaining(&info->timer->alarm); + struct timespec64 ktimespec = ktime_to_timespec64(tout); + + if (ktimespec.tv_sec > 0) { + pr_debug("time_expiry_remaining %lld\n", + ktimespec.tv_sec); + alarm_start_relative(&info->timer->alarm, tout); + } + } else { + mod_timer(&info->timer->timer, + msecs_to_jiffies(info->timeout * 1000) + jiffies); + } + pr_debug("increased refcnt of timer %s to %u\n", + info->label, info->timer->refcnt); + } else { + ret = idletimer_tg_create_v1(info); + if (ret < 0) { + pr_debug("failed to create timer\n"); + mutex_unlock(&list_mutex); + return ret; + } + } + + mutex_unlock(&list_mutex); + return 0; +} + static void idletimer_tg_destroy(const struct xt_tgdtor_param *par) { const struct idletimer_tg_info *info = par->targinfo; @@ -247,7 +423,38 @@ static void idletimer_tg_destroy(const struct xt_tgdtor_param *par) mutex_unlock(&list_mutex); } -static struct xt_target idletimer_tg __read_mostly = { +static void idletimer_tg_destroy_v1(const struct xt_tgdtor_param *par) +{ + const struct idletimer_tg_info_v1 *info = par->targinfo; + + pr_debug("destroy targinfo %s\n", info->label); + + mutex_lock(&list_mutex); + + if (--info->timer->refcnt == 0) { + pr_debug("deleting timer %s\n", info->label); + + list_del(&info->timer->entry); + if (info->timer->timer_type & XT_IDLETIMER_ALARM) { + alarm_cancel(&info->timer->alarm); + } else { + del_timer_sync(&info->timer->timer); + } + cancel_work_sync(&info->timer->work); + sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); + kfree(info->timer->attr.attr.name); + kfree(info->timer); + } else { + pr_debug("decreased refcnt of timer %s to %u\n", + info->label, info->timer->refcnt); + } + + mutex_unlock(&list_mutex); +} + + +static struct xt_target idletimer_tg[] __read_mostly = { + { .name = "IDLETIMER", .family = NFPROTO_UNSPEC, .target = idletimer_tg_target, @@ -256,6 +463,20 @@ static struct xt_target idletimer_tg __read_mostly = { .checkentry = idletimer_tg_checkentry, .destroy = idletimer_tg_destroy, .me = THIS_MODULE, + }, + { + .name = "IDLETIMER", + .family = NFPROTO_UNSPEC, + .revision = 1, + .target = idletimer_tg_target_v1, + .targetsize = sizeof(struct idletimer_tg_info_v1), + .usersize = offsetof(struct idletimer_tg_info_v1, timer), + .checkentry = idletimer_tg_checkentry_v1, + .destroy = idletimer_tg_destroy_v1, + .me = THIS_MODULE, + }, + + }; static struct class *idletimer_tg_class; @@ -283,7 +504,8 @@ static int __init idletimer_tg_init(void) idletimer_tg_kobj = &idletimer_tg_device->kobj; - err = xt_register_target(&idletimer_tg); + err = xt_register_targets(idletimer_tg, ARRAY_SIZE(idletimer_tg)); + if (err < 0) { pr_debug("couldn't register xt target\n"); goto out_dev; @@ -300,7 +522,7 @@ out: static void __exit idletimer_tg_exit(void) { - xt_unregister_target(&idletimer_tg); + xt_unregister_targets(idletimer_tg, ARRAY_SIZE(idletimer_tg)); device_destroy(idletimer_tg_class, MKDEV(0, 0)); class_destroy(idletimer_tg_class); diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index 2317721f3ecb..75625d13e976 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -21,8 +21,6 @@ MODULE_DESCRIPTION("Xtables: packet security mark modification"); MODULE_ALIAS("ipt_SECMARK"); MODULE_ALIAS("ip6t_SECMARK"); -#define PFX "SECMARK: " - static u8 mode; static unsigned int diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 8c835ad63729..9c5cfd74a0ee 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -132,7 +132,7 @@ struct xt_hashlimit_htable { const char *name; struct net *net; - struct hlist_head hash[0]; /* hashtable itself */ + struct hlist_head hash[]; /* hashtable itself */ }; static int diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 0a9708004e20..19bef176145e 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -71,7 +71,7 @@ struct recent_entry { u_int8_t ttl; u_int8_t index; u_int16_t nstamps; - unsigned long stamps[0]; + unsigned long stamps[]; }; struct recent_table { @@ -82,7 +82,7 @@ struct recent_table { unsigned int entries; u8 nstamps_max_mask; struct list_head lru_list; - struct list_head iphash[0]; + struct list_head iphash[]; }; struct recent_net { @@ -492,12 +492,12 @@ static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos) const struct recent_entry *e = v; const struct list_head *head = e->list.next; + (*pos)++; while (head == &t->iphash[st->bucket]) { if (++st->bucket >= ip_list_hash_size) return NULL; head = t->iphash[st->bucket].next; } - (*pos)++; return list_entry(head, struct recent_entry, list); } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 19df49a6ad15..ed77c75bf63f 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2434,7 +2434,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, in_skb->len)) WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS, (u8 *)extack->bad_attr - - in_skb->data)); + (u8 *)nlh)); } else { if (extack->cookie_len) WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE, diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 6f1b096e601c..43811b5219b5 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -181,13 +181,20 @@ exit: void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, struct sk_buff *skb) { - u8 gate = hdev->pipes[pipe].gate; u8 status = NFC_HCI_ANY_OK; struct hci_create_pipe_resp *create_info; struct hci_delete_pipe_noti *delete_info; struct hci_all_pipe_cleared_noti *cleared_info; + u8 gate; - pr_debug("from gate %x pipe %x cmd %x\n", gate, pipe, cmd); + pr_debug("from pipe %x cmd %x\n", pipe, cmd); + + if (pipe >= NFC_HCI_MAX_PIPES) { + status = NFC_HCI_ANY_E_NOK; + goto exit; + } + + gate = hdev->pipes[pipe].gate; switch (cmd) { case NFC_HCI_ADM_NOTIFY_PIPE_CREATED: @@ -375,8 +382,14 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event, struct sk_buff *skb) { int r = 0; - u8 gate = hdev->pipes[pipe].gate; + u8 gate; + + if (pipe >= NFC_HCI_MAX_PIPES) { + pr_err("Discarded event %x to invalid pipe %x\n", event, pipe); + goto exit; + } + gate = hdev->pipes[pipe].gate; if (gate == NFC_HCI_INVALID_GATE) { pr_err("Discarded event %x to unopened pipe %x\n", event, pipe); goto exit; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index eee0dddb7749..e894254c17d4 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -32,6 +32,7 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { [NFC_ATTR_DEVICE_NAME] = { .type = NLA_STRING, .len = NFC_DEVICE_NAME_MAXSIZE }, [NFC_ATTR_PROTOCOLS] = { .type = NLA_U32 }, + [NFC_ATTR_TARGET_INDEX] = { .type = NLA_U32 }, [NFC_ATTR_COMM_MODE] = { .type = NLA_U8 }, [NFC_ATTR_RF_MODE] = { .type = NLA_U8 }, [NFC_ATTR_DEVICE_POWERED] = { .type = NLA_U8 }, @@ -43,7 +44,10 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { [NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED }, [NFC_ATTR_FIRMWARE_NAME] = { .type = NLA_STRING, .len = NFC_FIRMWARE_NAME_MAXSIZE }, + [NFC_ATTR_SE_INDEX] = { .type = NLA_U32 }, [NFC_ATTR_SE_APDU] = { .type = NLA_BINARY }, + [NFC_ATTR_VENDOR_ID] = { .type = NLA_U32 }, + [NFC_ATTR_VENDOR_SUBCMD] = { .type = NLA_U32 }, [NFC_ATTR_VENDOR_DATA] = { .type = NLA_BINARY }, }; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index c047afd12116..07a7dd185995 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -645,6 +645,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG }, [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 }, + [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 }, }; static const struct genl_ops dp_packet_genl_ops[] = { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 30c6879d6774..e5b0986215d2 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2274,6 +2274,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, TP_STATUS_KERNEL, (macoff+snaplen)); if (!h.raw) goto drop_n_account; + + if (do_vnet && + virtio_net_hdr_from_skb(skb, h.raw + macoff - + sizeof(struct virtio_net_hdr), + vio_le(), true, 0)) + goto drop_n_account; + if (po->tp_version <= TPACKET_V2) { packet_increment_rx_head(po, &po->rx_ring); /* @@ -2286,12 +2293,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, status |= TP_STATUS_LOSING; } - if (do_vnet && - virtio_net_hdr_from_skb(skb, h.raw + macoff - - sizeof(struct virtio_net_hdr), - vio_le(), true, 0)) - goto drop_n_account; - po->stats.stats1.tp_packets++; if (copy_skb) { status |= TP_STATUS_COPY; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index aa7b737fed2e..861a831b0ef7 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -185,7 +185,7 @@ static size_t tcf_action_shared_attrs_size(const struct tc_action *act) return nla_total_size(0) /* action number nested */ + nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */ + cookie_len /* TCA_ACT_COOKIE */ - + nla_total_size(sizeof(struct nla_bitfield32)) /* TCA_ACT_HW_STATS_TYPE */ + + nla_total_size(sizeof(struct nla_bitfield32)) /* TCA_ACT_HW_STATS */ + nla_total_size(0) /* TCA_ACT_STATS nested */ + nla_total_size(sizeof(struct nla_bitfield32)) /* TCA_ACT_FLAGS */ /* TCA_STATS_BASIC */ @@ -789,14 +789,13 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) } rcu_read_unlock(); - if (a->hw_stats_type != TCA_ACT_HW_STATS_TYPE_ANY) { - struct nla_bitfield32 hw_stats_type = { - a->hw_stats_type, - TCA_ACT_HW_STATS_TYPE_ANY, + if (a->hw_stats != TCA_ACT_HW_STATS_ANY) { + struct nla_bitfield32 hw_stats = { + a->hw_stats, + TCA_ACT_HW_STATS_ANY, }; - if (nla_put(skb, TCA_ACT_HW_STATS_TYPE, sizeof(hw_stats_type), - &hw_stats_type)) + if (nla_put(skb, TCA_ACT_HW_STATS, sizeof(hw_stats), &hw_stats)) goto nla_put_failure; } @@ -866,22 +865,22 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb) return c; } -static u8 tcf_action_hw_stats_type_get(struct nlattr *hw_stats_type_attr) +static u8 tcf_action_hw_stats_get(struct nlattr *hw_stats_attr) { - struct nla_bitfield32 hw_stats_type_bf; + struct nla_bitfield32 hw_stats_bf; /* If the user did not pass the attr, that means he does * not care about the type. Return "any" in that case * which is setting on all supported types. */ - if (!hw_stats_type_attr) - return TCA_ACT_HW_STATS_TYPE_ANY; - hw_stats_type_bf = nla_get_bitfield32(hw_stats_type_attr); - return hw_stats_type_bf.value; + if (!hw_stats_attr) + return TCA_ACT_HW_STATS_ANY; + hw_stats_bf = nla_get_bitfield32(hw_stats_attr); + return hw_stats_bf.value; } static const u32 tca_act_flags_allowed = TCA_ACT_FLAGS_NO_PERCPU_STATS; -static const u32 tca_act_hw_stats_type_allowed = TCA_ACT_HW_STATS_TYPE_ANY; +static const u32 tca_act_hw_stats_allowed = TCA_ACT_HW_STATS_ANY; static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = { [TCA_ACT_KIND] = { .type = NLA_STRING }, @@ -891,8 +890,8 @@ static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = { [TCA_ACT_OPTIONS] = { .type = NLA_NESTED }, [TCA_ACT_FLAGS] = { .type = NLA_BITFIELD32, .validation_data = &tca_act_flags_allowed }, - [TCA_ACT_HW_STATS_TYPE] = { .type = NLA_BITFIELD32, - .validation_data = &tca_act_hw_stats_type_allowed }, + [TCA_ACT_HW_STATS] = { .type = NLA_BITFIELD32, + .validation_data = &tca_act_hw_stats_allowed }, }; struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, @@ -901,8 +900,8 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { - u8 hw_stats_type = TCA_ACT_HW_STATS_TYPE_ANY; struct nla_bitfield32 flags = { 0, 0 }; + u8 hw_stats = TCA_ACT_HW_STATS_ANY; struct tc_action *a; struct tc_action_ops *a_o; struct tc_cookie *cookie = NULL; @@ -934,8 +933,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, goto err_out; } } - hw_stats_type = - tcf_action_hw_stats_type_get(tb[TCA_ACT_HW_STATS_TYPE]); + hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]); if (tb[TCA_ACT_FLAGS]) flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]); } else { @@ -987,7 +985,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, tcf_set_action_cookie(&a->act_cookie, cookie); if (!name) - a->hw_stats_type = hw_stats_type; + a->hw_stats = hw_stats; /* module count goes up only when brand new policy is created * if it exists and is only bound to in a_o->init() then diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 3d9e678d7d53..56b66d215a89 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -55,7 +55,219 @@ static const struct rhashtable_params zones_params = { .automatic_shrinking = true, }; +static struct flow_action_entry * +tcf_ct_flow_table_flow_action_get_next(struct flow_action *flow_action) +{ + int i = flow_action->num_entries++; + + return &flow_action->entries[i]; +} + +static void tcf_ct_add_mangle_action(struct flow_action *action, + enum flow_action_mangle_base htype, + u32 offset, + u32 mask, + u32 val) +{ + struct flow_action_entry *entry; + + entry = tcf_ct_flow_table_flow_action_get_next(action); + entry->id = FLOW_ACTION_MANGLE; + entry->mangle.htype = htype; + entry->mangle.mask = ~mask; + entry->mangle.offset = offset; + entry->mangle.val = val; +} + +/* The following nat helper functions check if the inverted reverse tuple + * (target) is different then the current dir tuple - meaning nat for ports + * and/or ip is needed, and add the relevant mangle actions. + */ +static void +tcf_ct_flow_table_add_action_nat_ipv4(const struct nf_conntrack_tuple *tuple, + struct nf_conntrack_tuple target, + struct flow_action *action) +{ + if (memcmp(&target.src.u3, &tuple->src.u3, sizeof(target.src.u3))) + tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_IP4, + offsetof(struct iphdr, saddr), + 0xFFFFFFFF, + be32_to_cpu(target.src.u3.ip)); + if (memcmp(&target.dst.u3, &tuple->dst.u3, sizeof(target.dst.u3))) + tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_IP4, + offsetof(struct iphdr, daddr), + 0xFFFFFFFF, + be32_to_cpu(target.dst.u3.ip)); +} + +static void +tcf_ct_add_ipv6_addr_mangle_action(struct flow_action *action, + union nf_inet_addr *addr, + u32 offset) +{ + int i; + + for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i++) + tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_IP6, + i * sizeof(u32) + offset, + 0xFFFFFFFF, be32_to_cpu(addr->ip6[i])); +} + +static void +tcf_ct_flow_table_add_action_nat_ipv6(const struct nf_conntrack_tuple *tuple, + struct nf_conntrack_tuple target, + struct flow_action *action) +{ + if (memcmp(&target.src.u3, &tuple->src.u3, sizeof(target.src.u3))) + tcf_ct_add_ipv6_addr_mangle_action(action, &target.src.u3, + offsetof(struct ipv6hdr, + saddr)); + if (memcmp(&target.dst.u3, &tuple->dst.u3, sizeof(target.dst.u3))) + tcf_ct_add_ipv6_addr_mangle_action(action, &target.dst.u3, + offsetof(struct ipv6hdr, + daddr)); +} + +static void +tcf_ct_flow_table_add_action_nat_tcp(const struct nf_conntrack_tuple *tuple, + struct nf_conntrack_tuple target, + struct flow_action *action) +{ + __be16 target_src = target.src.u.tcp.port; + __be16 target_dst = target.dst.u.tcp.port; + + if (target_src != tuple->src.u.tcp.port) + tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_TCP, + offsetof(struct tcphdr, source), + 0xFFFF, be16_to_cpu(target_src)); + if (target_dst != tuple->dst.u.tcp.port) + tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_TCP, + offsetof(struct tcphdr, dest), + 0xFFFF, be16_to_cpu(target_dst)); +} + +static void +tcf_ct_flow_table_add_action_nat_udp(const struct nf_conntrack_tuple *tuple, + struct nf_conntrack_tuple target, + struct flow_action *action) +{ + __be16 target_src = target.src.u.udp.port; + __be16 target_dst = target.dst.u.udp.port; + + if (target_src != tuple->src.u.udp.port) + tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_TCP, + offsetof(struct udphdr, source), + 0xFFFF, be16_to_cpu(target_src)); + if (target_dst != tuple->dst.u.udp.port) + tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_TCP, + offsetof(struct udphdr, dest), + 0xFFFF, be16_to_cpu(target_dst)); +} + +static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct, + enum ip_conntrack_dir dir, + struct flow_action *action) +{ + struct nf_conn_labels *ct_labels; + struct flow_action_entry *entry; + enum ip_conntrack_info ctinfo; + u32 *act_ct_labels; + + entry = tcf_ct_flow_table_flow_action_get_next(action); + entry->id = FLOW_ACTION_CT_METADATA; +#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) + entry->ct_metadata.mark = ct->mark; +#endif + ctinfo = dir == IP_CT_DIR_ORIGINAL ? IP_CT_ESTABLISHED : + IP_CT_ESTABLISHED_REPLY; + /* aligns with the CT reference on the SKB nf_ct_set */ + entry->ct_metadata.cookie = (unsigned long)ct | ctinfo; + + act_ct_labels = entry->ct_metadata.labels; + ct_labels = nf_ct_labels_find(ct); + if (ct_labels) + memcpy(act_ct_labels, ct_labels->bits, NF_CT_LABELS_MAX_SIZE); + else + memset(act_ct_labels, 0, NF_CT_LABELS_MAX_SIZE); +} + +static int tcf_ct_flow_table_add_action_nat(struct net *net, + struct nf_conn *ct, + enum ip_conntrack_dir dir, + struct flow_action *action) +{ + const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; + struct nf_conntrack_tuple target; + + nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); + + switch (tuple->src.l3num) { + case NFPROTO_IPV4: + tcf_ct_flow_table_add_action_nat_ipv4(tuple, target, + action); + break; + case NFPROTO_IPV6: + tcf_ct_flow_table_add_action_nat_ipv6(tuple, target, + action); + break; + default: + return -EOPNOTSUPP; + } + + switch (nf_ct_protonum(ct)) { + case IPPROTO_TCP: + tcf_ct_flow_table_add_action_nat_tcp(tuple, target, action); + break; + case IPPROTO_UDP: + tcf_ct_flow_table_add_action_nat_udp(tuple, target, action); + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static int tcf_ct_flow_table_fill_actions(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir tdir, + struct nf_flow_rule *flow_rule) +{ + struct flow_action *action = &flow_rule->rule->action; + int num_entries = action->num_entries; + struct nf_conn *ct = flow->ct; + enum ip_conntrack_dir dir; + int i, err; + + switch (tdir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + dir = IP_CT_DIR_ORIGINAL; + break; + case FLOW_OFFLOAD_DIR_REPLY: + dir = IP_CT_DIR_REPLY; + break; + default: + return -EOPNOTSUPP; + } + + err = tcf_ct_flow_table_add_action_nat(net, ct, dir, action); + if (err) + goto err_nat; + + tcf_ct_flow_table_add_action_meta(ct, dir, action); + return 0; + +err_nat: + /* Clear filled actions */ + for (i = num_entries; i < action->num_entries; i++) + memset(&action->entries[i], 0, sizeof(action->entries[i])); + action->num_entries = num_entries; + + return err; +} + static struct nf_flowtable_type flowtable_ct = { + .action = tcf_ct_flow_table_fill_actions, .owner = THIS_MODULE, }; @@ -80,6 +292,7 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params) goto err_insert; ct_ft->nf_ft.type = &flowtable_ct; + ct_ft->nf_ft.flags |= NF_FLOWTABLE_HW_OFFLOAD; err = nf_flow_table_init(&ct_ft->nf_ft); if (err) goto err_init; @@ -87,6 +300,7 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params) __module_get(THIS_MODULE); out_unlock: params->ct_ft = ct_ft; + params->nf_ft = &ct_ft->nf_ft; mutex_unlock(&zones_mutex); return 0; @@ -319,6 +533,7 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, ctinfo = dir == FLOW_OFFLOAD_DIR_ORIGINAL ? IP_CT_ESTABLISHED : IP_CT_ESTABLISHED_REPLY; + flow_offload_refresh(nf_ft, flow); nf_conntrack_get(&ct->ct_general); nf_ct_set(skb, ct, ctinfo); @@ -1323,6 +1538,17 @@ static void __exit ct_cleanup_module(void) destroy_workqueue(act_ct_wq); } +void tcf_ct_flow_table_restore_skb(struct sk_buff *skb, unsigned long cookie) +{ + enum ip_conntrack_info ctinfo = cookie & NFCT_INFOMASK; + struct nf_conn *ct; + + ct = (struct nf_conn *)(cookie & NFCT_PTRMASK); + nf_conntrack_get(&ct->ct_general); + nf_ct_set(skb, ct, ctinfo); +} +EXPORT_SYMBOL_GPL(tcf_ct_flow_table_restore_skb); + module_init(ct_init_module); module_exit(ct_cleanup_module); MODULE_AUTHOR("Paul Blakey <paulb@mellanox.com>"); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index e91448640a4f..91a5de0bf628 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -22,6 +22,7 @@ #include <linux/idr.h> #include <linux/rhashtable.h> #include <linux/jhash.h> +#include <linux/rculist.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/netlink.h> @@ -354,7 +355,7 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block, chain = kzalloc(sizeof(*chain), GFP_KERNEL); if (!chain) return NULL; - list_add_tail(&chain->list, &block->chain_list); + list_add_tail_rcu(&chain->list, &block->chain_list); mutex_init(&chain->filter_chain_lock); chain->block = block; chain->index = chain_index; @@ -394,7 +395,7 @@ static bool tcf_chain_detach(struct tcf_chain *chain) ASSERT_BLOCK_LOCKED(block); - list_del(&chain->list); + list_del_rcu(&chain->list); if (!chain->index) block->chain0.chain = NULL; @@ -453,6 +454,20 @@ static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block, return NULL; } +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) +static struct tcf_chain *tcf_chain_lookup_rcu(const struct tcf_block *block, + u32 chain_index) +{ + struct tcf_chain *chain; + + list_for_each_entry_rcu(chain, &block->chain_list, list) { + if (chain->index == chain_index) + return chain; + } + return NULL; +} +#endif + static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, u32 seq, u16 flags, int event, bool unicast); @@ -1559,12 +1574,15 @@ static int tcf_block_setup(struct tcf_block *block, * to this qdisc, (optionally) tests for protocol and asks * specific classifiers. */ -int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res, bool compat_mode) +static inline int __tcf_classify(struct sk_buff *skb, + const struct tcf_proto *tp, + const struct tcf_proto *orig_tp, + struct tcf_result *res, + bool compat_mode, + u32 *last_executed_chain) { #ifdef CONFIG_NET_CLS_ACT const int max_reclassify_loop = 4; - const struct tcf_proto *orig_tp = tp; const struct tcf_proto *first_tp; int limit = 0; @@ -1582,21 +1600,11 @@ reclassify: #ifdef CONFIG_NET_CLS_ACT if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) { first_tp = orig_tp; + *last_executed_chain = first_tp->chain->index; goto reset; } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) { first_tp = res->goto_tp; - -#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) - { - struct tc_skb_ext *ext; - - ext = skb_ext_add(skb, TC_SKB_EXT); - if (WARN_ON_ONCE(!ext)) - return TC_ACT_SHOT; - - ext->chain = err & TC_ACT_EXT_VAL_MASK; - } -#endif + *last_executed_chain = err & TC_ACT_EXT_VAL_MASK; goto reset; } #endif @@ -1619,8 +1627,64 @@ reset: goto reclassify; #endif } + +int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode) +{ + u32 last_executed_chain = 0; + + return __tcf_classify(skb, tp, tp, res, compat_mode, + &last_executed_chain); +} EXPORT_SYMBOL(tcf_classify); +int tcf_classify_ingress(struct sk_buff *skb, + const struct tcf_block *ingress_block, + const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode) +{ +#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + u32 last_executed_chain = 0; + + return __tcf_classify(skb, tp, tp, res, compat_mode, + &last_executed_chain); +#else + u32 last_executed_chain = tp ? tp->chain->index : 0; + const struct tcf_proto *orig_tp = tp; + struct tc_skb_ext *ext; + int ret; + + ext = skb_ext_find(skb, TC_SKB_EXT); + + if (ext && ext->chain) { + struct tcf_chain *fchain; + + fchain = tcf_chain_lookup_rcu(ingress_block, ext->chain); + if (!fchain) + return TC_ACT_SHOT; + + /* Consume, so cloned/redirect skbs won't inherit ext */ + skb_ext_del(skb, TC_SKB_EXT); + + tp = rcu_dereference_bh(fchain->filter_chain); + } + + ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode, + &last_executed_chain); + + /* If we missed on some chain */ + if (ret == TC_ACT_UNSPEC && last_executed_chain) { + ext = skb_ext_add(skb, TC_SKB_EXT); + if (WARN_ON_ONCE(!ext)) + return TC_ACT_SHOT; + ext->chain = last_executed_chain; + } + + return ret; +#endif +} +EXPORT_SYMBOL(tcf_classify_ingress); + struct tcf_chain_info { struct tcf_proto __rcu **pprev; struct tcf_proto __rcu *next; @@ -3464,9 +3528,9 @@ int tc_setup_flow_action(struct flow_action *flow_action, struct tc_action *act; int i, j, k, err = 0; - BUILD_BUG_ON(TCA_ACT_HW_STATS_TYPE_ANY != FLOW_ACTION_HW_STATS_TYPE_ANY); - BUILD_BUG_ON(TCA_ACT_HW_STATS_TYPE_IMMEDIATE != FLOW_ACTION_HW_STATS_TYPE_IMMEDIATE); - BUILD_BUG_ON(TCA_ACT_HW_STATS_TYPE_DELAYED != FLOW_ACTION_HW_STATS_TYPE_DELAYED); + BUILD_BUG_ON(TCA_ACT_HW_STATS_ANY != FLOW_ACTION_HW_STATS_ANY); + BUILD_BUG_ON(TCA_ACT_HW_STATS_IMMEDIATE != FLOW_ACTION_HW_STATS_IMMEDIATE); + BUILD_BUG_ON(TCA_ACT_HW_STATS_DELAYED != FLOW_ACTION_HW_STATS_DELAYED); if (!exts) return 0; @@ -3481,7 +3545,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, if (err) goto err_out_locked; - entry->hw_stats_type = act->hw_stats_type; + entry->hw_stats = act->hw_stats; if (is_tcf_gact_ok(act)) { entry->id = FLOW_ACTION_ACCEPT; @@ -3549,6 +3613,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->mangle.mask = tcf_pedit_mask(act, k); entry->mangle.val = tcf_pedit_val(act, k); entry->mangle.offset = tcf_pedit_offset(act, k); + entry->hw_stats = act->hw_stats; entry = &flow_action->entries[++j]; } } else if (is_tcf_csum(act)) { @@ -3572,6 +3637,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->id = FLOW_ACTION_CT; entry->ct.action = tcf_ct_action(act); entry->ct.zone = tcf_ct_zone(act); + entry->ct.flow_table = tcf_ct_ft(act); } else if (is_tcf_mpls(act)) { switch (tcf_mpls_action(act)) { case TCA_MPLS_ACT_PUSH: @@ -3599,6 +3665,9 @@ int tc_setup_flow_action(struct flow_action *flow_action, } else if (is_tcf_skbedit_ptype(act)) { entry->id = FLOW_ACTION_PTYPE; entry->ptype = tcf_skbedit_ptype(act); + } else if (is_tcf_skbedit_priority(act)) { + entry->id = FLOW_ACTION_PRIORITY; + entry->priority = tcf_skbedit_priority(act); } else { err = -EOPNOTSUPP; goto err_out_locked; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 50794125bf02..0d99df1e764d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -618,21 +618,28 @@ void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc) } EXPORT_SYMBOL(qdisc_watchdog_init); -void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires) +void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires, + u64 delta_ns) { if (test_bit(__QDISC_STATE_DEACTIVATED, &qdisc_root_sleeping(wd->qdisc)->state)) return; - if (wd->last_expires == expires) - return; + if (hrtimer_is_queued(&wd->timer)) { + /* If timer is already set in [expires, expires + delta_ns], + * do not reprogram it. + */ + if (wd->last_expires - expires <= delta_ns) + return; + } wd->last_expires = expires; - hrtimer_start(&wd->timer, - ns_to_ktime(expires), - HRTIMER_MODE_ABS_PINNED); + hrtimer_start_range_ns(&wd->timer, + ns_to_ktime(expires), + delta_ns, + HRTIMER_MODE_ABS_PINNED); } -EXPORT_SYMBOL(qdisc_watchdog_schedule_ns); +EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns); void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) { diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index a5a295477ecc..4c060134c736 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -121,6 +121,8 @@ struct fq_sched_data { u64 stat_flows_plimit; u64 stat_pkts_too_long; u64 stat_allocation_errors; + + u32 timer_slack; /* hrtimer slack in ns */ struct qdisc_watchdog watchdog; }; @@ -504,8 +506,9 @@ begin: head = &q->old_flows; if (!head->first) { if (q->time_next_delayed_flow != ~0ULL) - qdisc_watchdog_schedule_ns(&q->watchdog, - q->time_next_delayed_flow); + qdisc_watchdog_schedule_range_ns(&q->watchdog, + q->time_next_delayed_flow, + q->timer_slack); return NULL; } } @@ -735,6 +738,8 @@ static int fq_resize(struct Qdisc *sch, u32 log) } static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { + [TCA_FQ_UNSPEC] = { .strict_start_type = TCA_FQ_TIMER_SLACK }, + [TCA_FQ_PLIMIT] = { .type = NLA_U32 }, [TCA_FQ_FLOW_PLIMIT] = { .type = NLA_U32 }, [TCA_FQ_QUANTUM] = { .type = NLA_U32 }, @@ -744,8 +749,10 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 }, [TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 }, [TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 }, + [TCA_FQ_ORPHAN_MASK] = { .type = NLA_U32 }, [TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 }, [TCA_FQ_CE_THRESHOLD] = { .type = NLA_U32 }, + [TCA_FQ_TIMER_SLACK] = { .type = NLA_U32 }, }; static int fq_change(struct Qdisc *sch, struct nlattr *opt, @@ -832,6 +839,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, q->ce_threshold = (u64)NSEC_PER_USEC * nla_get_u32(tb[TCA_FQ_CE_THRESHOLD]); + if (tb[TCA_FQ_TIMER_SLACK]) + q->timer_slack = nla_get_u32(tb[TCA_FQ_TIMER_SLACK]); + if (!err) { sch_tree_unlock(sch); err = fq_resize(sch, fq_log); @@ -883,6 +893,8 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt, q->orphan_mask = 1024 - 1; q->low_rate_threshold = 550000 / 8; + q->timer_slack = 10 * NSEC_PER_USEC; /* 10 usec of hrtimer slack */ + /* Default ce_threshold of 4294 seconds */ q->ce_threshold = (u64)NSEC_PER_USEC * ~0U; @@ -923,7 +935,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD, q->low_rate_threshold) || nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) || - nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log)) + nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log) || + nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack)) goto nla_put_failure; return nla_nest_end(skb, opts); @@ -946,7 +959,8 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) st.flows_plimit = q->stat_flows_plimit; st.pkts_too_long = q->stat_pkts_too_long; st.allocation_errors = q->stat_allocation_errors; - st.time_next_delayed_flow = q->time_next_delayed_flow - ktime_get_ns(); + st.time_next_delayed_flow = q->time_next_delayed_flow + q->timer_slack - + ktime_get_ns(); st.flows = q->flows; st.inactive_flows = q->inactive_flows; st.throttled_flows = q->throttled_flows; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 36a40ebcf0ee..2efd5b61acef 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1268,27 +1268,6 @@ int dev_qdisc_change_tx_queue_len(struct net_device *dev) return ret; } -void dev_qdisc_set_real_num_tx_queues(struct net_device *dev) -{ -#ifdef CONFIG_NET_SCHED - struct Qdisc *sch = dev->qdisc; - unsigned int ntx; - - if (!sch) - return; - - ASSERT_RTNL(); - - for (ntx = 0; ntx < dev->real_num_tx_queues; ntx++) { - struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); - struct Qdisc *qdisc = dev_queue->qdisc; - - if (qdisc && !qdisc_hashed(qdisc)) - qdisc_hash_add(qdisc, false); - } -#endif -} - static void dev_init_scheduler_queue(struct net_device *dev, struct netdev_queue *dev_queue, void *_qdisc) @@ -1412,6 +1391,14 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, } EXPORT_SYMBOL(mini_qdisc_pair_swap); +void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp, + struct tcf_block *block) +{ + miniqp->miniq1.block = block; + miniqp->miniq2.block = block; +} +EXPORT_SYMBOL(mini_qdisc_pair_block_init); + void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, struct mini_Qdisc __rcu **p_miniq) { diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index bf56aa519797..84838128b9c5 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -78,6 +78,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt, { struct ingress_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); + int err; net_inc_ingress_queue(); @@ -87,7 +88,13 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt, q->block_info.chain_head_change = clsact_chain_head_change; q->block_info.chain_head_change_priv = &q->miniqp; - return tcf_block_get_ext(&q->block, sch, &q->block_info, extack); + err = tcf_block_get_ext(&q->block, sch, &q->block_info, extack); + if (err) + return err; + + mini_qdisc_pair_block_init(&q->miniqp, q->block); + + return 0; } static void ingress_destroy(struct Qdisc *sch) @@ -226,6 +233,8 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt, if (err) return err; + mini_qdisc_pair_block_init(&q->miniqp_ingress, q->ingress_block); + mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress); q->egress_block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS; diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 1695421333e3..3ef0a4f7399b 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -35,7 +35,11 @@ struct red_sched_data { u32 limit; /* HARD maximal queue length */ + unsigned char flags; + /* Non-flags in tc_red_qopt.flags. */ + unsigned char userbits; + struct timer_list adapt_timer; struct Qdisc *sch; struct red_parms parms; @@ -44,6 +48,8 @@ struct red_sched_data { struct Qdisc *qdisc; }; +static const u32 red_supported_flags = TC_RED_HISTORIC_FLAGS | TC_RED_NODROP; + static inline int red_use_ecn(struct red_sched_data *q) { return q->flags & TC_RED_ECN; @@ -54,6 +60,11 @@ static inline int red_use_harddrop(struct red_sched_data *q) return q->flags & TC_RED_HARDDROP; } +static int red_use_nodrop(struct red_sched_data *q) +{ + return q->flags & TC_RED_NODROP; +} + static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { @@ -74,23 +85,36 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, case RED_PROB_MARK: qdisc_qstats_overlimit(sch); - if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) { + if (!red_use_ecn(q)) { q->stats.prob_drop++; goto congestion_drop; } - q->stats.prob_mark++; + if (INET_ECN_set_ce(skb)) { + q->stats.prob_mark++; + } else if (!red_use_nodrop(q)) { + q->stats.prob_drop++; + goto congestion_drop; + } + + /* Non-ECT packet in ECN nodrop mode: queue it. */ break; case RED_HARD_MARK: qdisc_qstats_overlimit(sch); - if (red_use_harddrop(q) || !red_use_ecn(q) || - !INET_ECN_set_ce(skb)) { + if (red_use_harddrop(q) || !red_use_ecn(q)) { q->stats.forced_drop++; goto congestion_drop; } - q->stats.forced_mark++; + if (INET_ECN_set_ce(skb)) { + q->stats.forced_mark++; + } else if (!red_use_nodrop(q)) { + q->stats.forced_drop++; + goto congestion_drop; + } + + /* Non-ECT packet in ECN nodrop mode: queue it. */ break; } @@ -165,6 +189,7 @@ static int red_offload(struct Qdisc *sch, bool enable) opt.set.limit = q->limit; opt.set.is_ecn = red_use_ecn(q); opt.set.is_harddrop = red_use_harddrop(q); + opt.set.is_nodrop = red_use_nodrop(q); opt.set.qstats = &sch->qstats; } else { opt.command = TC_RED_DESTROY; @@ -183,9 +208,12 @@ static void red_destroy(struct Qdisc *sch) } static const struct nla_policy red_policy[TCA_RED_MAX + 1] = { + [TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS }, [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) }, [TCA_RED_STAB] = { .len = RED_STAB_SIZE }, [TCA_RED_MAX_P] = { .type = NLA_U32 }, + [TCA_RED_FLAGS] = { .type = NLA_BITFIELD32, + .validation_data = &red_supported_flags }, }; static int red_change(struct Qdisc *sch, struct nlattr *opt, @@ -194,7 +222,10 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, struct Qdisc *old_child = NULL, *child = NULL; struct red_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_RED_MAX + 1]; + struct nla_bitfield32 flags_bf; struct tc_red_qopt *ctl; + unsigned char userbits; + unsigned char flags; int err; u32 max_P; @@ -216,6 +247,12 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) return -EINVAL; + err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS, + tb[TCA_RED_FLAGS], red_supported_flags, + &flags_bf, &userbits, extack); + if (err) + return err; + if (ctl->limit > 0) { child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit, extack); @@ -227,7 +264,14 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, } sch_tree_lock(sch); - q->flags = ctl->flags; + + flags = (q->flags & ~flags_bf.selector) | flags_bf.value; + err = red_validate_flags(flags, extack); + if (err) + goto unlock_out; + + q->flags = flags; + q->userbits = userbits; q->limit = ctl->limit; if (child) { qdisc_tree_flush_backlog(q->qdisc); @@ -256,6 +300,12 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, if (old_child) qdisc_put(old_child); return 0; + +unlock_out: + sch_tree_unlock(sch); + if (child) + qdisc_put(child); + return err; } static inline void red_adaptative_timer(struct timer_list *t) @@ -299,10 +349,15 @@ static int red_dump_offload_stats(struct Qdisc *sch) static int red_dump(struct Qdisc *sch, struct sk_buff *skb) { struct red_sched_data *q = qdisc_priv(sch); + struct nla_bitfield32 flags_bf = { + .selector = red_supported_flags, + .value = q->flags, + }; struct nlattr *opts = NULL; struct tc_red_qopt opt = { .limit = q->limit, - .flags = q->flags, + .flags = (q->flags & TC_RED_HISTORIC_FLAGS) | + q->userbits, .qth_min = q->parms.qth_min >> q->parms.Wlog, .qth_max = q->parms.qth_max >> q->parms.Wlog, .Wlog = q->parms.Wlog, @@ -319,7 +374,8 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb) if (opts == NULL) goto nla_put_failure; if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) || - nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P)) + nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) || + nla_put(skb, TCA_RED_FLAGS, sizeof(flags_bf), &flags_bf)) goto nla_put_failure; return nla_nest_end(skb, opts); diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 660fc45ee40f..b1eb12d33b9a 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -564,8 +564,10 @@ static struct sk_buff *taprio_dequeue_soft(struct Qdisc *sch) prio = skb->priority; tc = netdev_get_prio_tc_map(dev, prio); - if (!(gate_mask & BIT(tc))) + if (!(gate_mask & BIT(tc))) { + skb = NULL; continue; + } len = qdisc_pkt_len(skb); guard = ktime_add_ns(taprio_get_time(q), @@ -575,13 +577,17 @@ static struct sk_buff *taprio_dequeue_soft(struct Qdisc *sch) * guard band ... */ if (gate_mask != TAPRIO_ALL_GATES_OPEN && - ktime_after(guard, entry->close_time)) + ktime_after(guard, entry->close_time)) { + skb = NULL; continue; + } /* ... and no budget. */ if (gate_mask != TAPRIO_ALL_GATES_OPEN && - atomic_sub_return(len, &entry->budget) < 0) + atomic_sub_return(len, &entry->budget) < 0) { + skb = NULL; continue; + } skb = child->ops->dequeue(child); if (unlikely(!skb)) @@ -768,6 +774,7 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 }, [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 }, [TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 }, + [TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 }, }; static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 69743a6aaf6f..493fc01e5d2b 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -237,15 +237,11 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc) addrcnt++; return nla_total_size(sizeof(struct sctp_info)) - + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ - + nla_total_size(1) /* INET_DIAG_TOS */ - + nla_total_size(1) /* INET_DIAG_TCLASS */ - + nla_total_size(4) /* INET_DIAG_MARK */ - + nla_total_size(4) /* INET_DIAG_CLASS_ID */ + nla_total_size(addrlen * asoc->peer.transport_count) + nla_total_size(addrlen * addrcnt) - + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + 64; } diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 9239cf881f21..04b6fefb8bce 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -600,6 +600,7 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) smc_smcr_terminate_all(smcibdev); smc_ib_cleanup_per_ibdev(smcibdev); ib_unregister_event_handler(&smcibdev->event_handler); + cancel_work_sync(&smcibdev->port_event_work); kfree(smcibdev); } diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 58708b4c7719..6dce2abf436e 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -322,9 +322,13 @@ static void mon_assign_roles(struct tipc_monitor *mon, struct tipc_peer *head) void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); - struct tipc_peer *self = get_self(net, bearer_id); + struct tipc_peer *self; struct tipc_peer *peer, *prev, *head; + if (!mon) + return; + + self = get_self(net, bearer_id); write_lock_bh(&mon->lock); peer = get_peer(mon, addr); if (!peer) @@ -407,11 +411,15 @@ exit: void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); - struct tipc_peer *self = get_self(net, bearer_id); + struct tipc_peer *self; struct tipc_peer *peer, *head; struct tipc_mon_domain *dom; int applied; + if (!mon) + return; + + self = get_self(net, bearer_id); write_lock_bh(&mon->lock); peer = get_peer(mon, addr); if (!peer) { diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 0d515d20b056..4d0e0bdd997b 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -736,9 +736,6 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) msg_set_destport(msg, dport); *err = TIPC_OK; - if (!skb_cloned(skb)) - return true; - return true; } diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 7c35094c20b8..bb9862410e68 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -116,6 +116,7 @@ const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { [TIPC_NLA_PROP_PRIO] = { .type = NLA_U32 }, [TIPC_NLA_PROP_TOL] = { .type = NLA_U32 }, [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 }, + [TIPC_NLA_PROP_MTU] = { .type = NLA_U32 }, [TIPC_NLA_PROP_BROADCAST] = { .type = NLA_U32 }, [TIPC_NLA_PROP_BROADCAST_RATIO] = { .type = NLA_U32 } }; diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 1c5574e2e058..a562ebaaa33c 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -366,7 +366,7 @@ static int tls_do_allocation(struct sock *sk, if (!offload_ctx->open_record) { if (unlikely(!skb_page_frag_refill(prepend_size, pfrag, sk->sk_allocation))) { - sk->sk_prot->enter_memory_pressure(sk); + READ_ONCE(sk->sk_prot)->enter_memory_pressure(sk); sk_stream_moderate_sndbuf(sk); return -ENOMEM; } diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 82225bcc1117..156efce50dbd 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -63,13 +63,14 @@ static DEFINE_MUTEX(tcpv4_prot_mutex); static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG]; static struct proto_ops tls_sw_proto_ops; static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], - struct proto *base); + const struct proto *base); void update_sk_prot(struct sock *sk, struct tls_context *ctx) { int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4; - sk->sk_prot = &tls_prots[ip_ver][ctx->tx_conf][ctx->rx_conf]; + WRITE_ONCE(sk->sk_prot, + &tls_prots[ip_ver][ctx->tx_conf][ctx->rx_conf]); } int wait_on_pending_writer(struct sock *sk, long *timeo) @@ -312,7 +313,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) write_lock_bh(&sk->sk_callback_lock); if (free_ctx) rcu_assign_pointer(icsk->icsk_ulp_data, NULL); - sk->sk_prot = ctx->sk_proto; + WRITE_ONCE(sk->sk_prot, ctx->sk_proto); if (sk->sk_write_space == tls_write_space) sk->sk_write_space = ctx->sk_write_space; write_unlock_bh(&sk->sk_callback_lock); @@ -621,38 +622,39 @@ struct tls_context *tls_ctx_create(struct sock *sk) mutex_init(&ctx->tx_lock); rcu_assign_pointer(icsk->icsk_ulp_data, ctx); - ctx->sk_proto = sk->sk_prot; + ctx->sk_proto = READ_ONCE(sk->sk_prot); return ctx; } static void tls_build_proto(struct sock *sk) { int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4; + const struct proto *prot = READ_ONCE(sk->sk_prot); /* Build IPv6 TLS whenever the address of tcpv6 _prot changes */ if (ip_ver == TLSV6 && - unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) { + unlikely(prot != smp_load_acquire(&saved_tcpv6_prot))) { mutex_lock(&tcpv6_prot_mutex); - if (likely(sk->sk_prot != saved_tcpv6_prot)) { - build_protos(tls_prots[TLSV6], sk->sk_prot); - smp_store_release(&saved_tcpv6_prot, sk->sk_prot); + if (likely(prot != saved_tcpv6_prot)) { + build_protos(tls_prots[TLSV6], prot); + smp_store_release(&saved_tcpv6_prot, prot); } mutex_unlock(&tcpv6_prot_mutex); } if (ip_ver == TLSV4 && - unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv4_prot))) { + unlikely(prot != smp_load_acquire(&saved_tcpv4_prot))) { mutex_lock(&tcpv4_prot_mutex); - if (likely(sk->sk_prot != saved_tcpv4_prot)) { - build_protos(tls_prots[TLSV4], sk->sk_prot); - smp_store_release(&saved_tcpv4_prot, sk->sk_prot); + if (likely(prot != saved_tcpv4_prot)) { + build_protos(tls_prots[TLSV4], prot); + smp_store_release(&saved_tcpv4_prot, prot); } mutex_unlock(&tcpv4_prot_mutex); } } static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], - struct proto *base) + const struct proto *base) { prot[TLS_BASE][TLS_BASE] = *base; prot[TLS_BASE][TLS_BASE].setsockopt = tls_setsockopt; diff --git a/net/wireless/core.c b/net/wireless/core.c index 3e25229a059d..341402b4f178 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -693,8 +693,14 @@ int wiphy_register(struct wiphy *wiphy) ~(BIT(NL80211_PREAMBLE_LEGACY) | BIT(NL80211_PREAMBLE_HT) | BIT(NL80211_PREAMBLE_VHT) | + BIT(NL80211_PREAMBLE_HE) | BIT(NL80211_PREAMBLE_DMG)))) return -EINVAL; + if (WARN_ON((wiphy->pmsr_capa->ftm.trigger_based || + wiphy->pmsr_capa->ftm.non_trigger_based) && + !(wiphy->pmsr_capa->ftm.preambles & + BIT(NL80211_PREAMBLE_HE)))) + return -EINVAL; if (WARN_ON(wiphy->pmsr_capa->ftm.bandwidths & ~(BIT(NL80211_CHAN_WIDTH_20_NOHT) | BIT(NL80211_CHAN_WIDTH_20) | diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 15000275b32d..ad87e9db9a91 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5,7 +5,7 @@ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2019 Intel Corporation + * Copyright (C) 2018-2020 Intel Corporation */ #include <linux/if.h> @@ -276,6 +276,8 @@ nl80211_pmsr_ftm_req_attr_policy[NL80211_PMSR_FTM_REQ_ATTR_MAX + 1] = { [NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES] = { .type = NLA_U8 }, [NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI] = { .type = NLA_FLAG }, [NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC] = { .type = NLA_FLAG }, + [NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED] = { .type = NLA_FLAG }, + [NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED] = { .type = NLA_FLAG }, }; static const struct nla_policy @@ -493,6 +495,8 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_WOWLAN_TRIGGERS] = { .type = NLA_NESTED }, [NL80211_ATTR_STA_PLINK_STATE] = NLA_POLICY_MAX(NLA_U8, NUM_NL80211_PLINK_STATES - 1), + [NL80211_ATTR_MEASUREMENT_DURATION] = { .type = NLA_U16 }, + [NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY] = { .type = NLA_FLAG }, [NL80211_ATTR_MESH_PEER_AID] = NLA_POLICY_RANGE(NLA_U16, 1, IEEE80211_MAX_AID), [NL80211_ATTR_SCHED_SCAN_INTERVAL] = { .type = NLA_U32 }, @@ -554,6 +558,8 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MDID] = { .type = NLA_U16 }, [NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, + [NL80211_ATTR_CRIT_PROT_ID] = { .type = NLA_U16 }, + [NL80211_ATTR_MAX_CRIT_PROT_DURATION] = { .type = NLA_U16 }, [NL80211_ATTR_PEER_AID] = NLA_POLICY_RANGE(NLA_U16, 1, IEEE80211_MAX_AID), [NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 }, @@ -584,6 +590,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { NLA_POLICY_MAX(NLA_U8, IEEE80211_NUM_UPS - 1), [NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 }, [NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 }, + [NL80211_ATTR_OPER_CLASS] = { .type = NLA_U8 }, [NL80211_ATTR_MAC_MASK] = { .type = NLA_EXACT_LEN_WARN, .len = ETH_ALEN @@ -653,6 +660,9 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_HE_BSS_COLOR] = NLA_POLICY_NESTED(he_bss_color_policy), [NL80211_ATTR_TID_CONFIG] = NLA_POLICY_NESTED_ARRAY(nl80211_tid_config_attr_policy), + [NL80211_ATTR_CONTROL_PORT_NO_PREAUTH] = { .type = NLA_FLAG }, + [NL80211_ATTR_PMK_LIFETIME] = NLA_POLICY_MIN(NLA_U32, 1), + [NL80211_ATTR_PMK_REAUTH_THRESHOLD] = NLA_POLICY_RANGE(NLA_U8, 1, 100), }; /* policy for the key attributes */ @@ -1879,6 +1889,12 @@ nl80211_send_pmsr_ftm_capa(const struct cfg80211_pmsr_capabilities *cap, nla_put_u32(msg, NL80211_PMSR_FTM_CAPA_ATTR_MAX_FTMS_PER_BURST, cap->ftm.max_ftms_per_burst)) return -ENOBUFS; + if (cap->ftm.trigger_based && + nla_put_flag(msg, NL80211_PMSR_FTM_CAPA_ATTR_TRIGGER_BASED)) + return -ENOBUFS; + if (cap->ftm.non_trigger_based && + nla_put_flag(msg, NL80211_PMSR_FTM_CAPA_ATTR_NON_TRIGGER_BASED)) + return -ENOBUFS; nla_nest_end(msg, ftm); return 0; @@ -4743,6 +4759,9 @@ static void nl80211_calculate_ap_params(struct cfg80211_ap_settings *params) cap = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_CAPABILITY, ies, ies_len); if (cap && cap[1] >= sizeof(*params->he_cap) + 1) params->he_cap = (void *)(cap + 3); + cap = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_OPERATION, ies, ies_len); + if (cap && cap[1] >= sizeof(*params->he_oper) + 1) + params->he_oper = (void *)(cap + 3); } static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev, @@ -6266,11 +6285,22 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_MAC]) params.mac = nla_data(info->attrs[NL80211_ATTR_MAC]); - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + switch (dev->ieee80211_ptr->iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_MESH_POINT: + case NL80211_IFTYPE_P2P_GO: + /* always accept these */ + break; + case NL80211_IFTYPE_ADHOC: + /* conditionally accept */ + if (wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_DEL_IBSS_STA)) + break; + return -EINVAL; + default: return -EINVAL; + } if (!rdev->ops->del_station) return -EOPNOTSUPP; @@ -9301,6 +9331,9 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, return r; settings->control_port_over_nl80211 = true; + + if (info->attrs[NL80211_ATTR_CONTROL_PORT_NO_PREAUTH]) + settings->control_port_no_preauth = true; } if (info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]) { @@ -10489,6 +10522,15 @@ static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info) pmksa.pmk_len = nla_len(info->attrs[NL80211_ATTR_PMK]); } + if (info->attrs[NL80211_ATTR_PMK_LIFETIME]) + pmksa.pmk_lifetime = + nla_get_u32(info->attrs[NL80211_ATTR_PMK_LIFETIME]); + + if (info->attrs[NL80211_ATTR_PMK_REAUTH_THRESHOLD]) + pmksa.pmk_reauth_threshold = + nla_get_u8( + info->attrs[NL80211_ATTR_PMK_REAUTH_THRESHOLD]); + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT && !(dev->ieee80211_ptr->iftype == NL80211_IFTYPE_AP && diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index c09fbf09549d..63dc8023447f 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -126,6 +126,38 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, "FTM: civic location request not supported"); } + out->ftm.trigger_based = + !!tb[NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED]; + if (out->ftm.trigger_based && !capa->ftm.trigger_based) { + NL_SET_ERR_MSG_ATTR(info->extack, + tb[NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED], + "FTM: trigger based ranging is not supported"); + return -EINVAL; + } + + out->ftm.non_trigger_based = + !!tb[NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED]; + if (out->ftm.non_trigger_based && !capa->ftm.non_trigger_based) { + NL_SET_ERR_MSG_ATTR(info->extack, + tb[NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED], + "FTM: trigger based ranging is not supported"); + return -EINVAL; + } + + if (out->ftm.trigger_based && out->ftm.non_trigger_based) { + NL_SET_ERR_MSG(info->extack, + "FTM: can't set both trigger based and non trigger based"); + return -EINVAL; + } + + if ((out->ftm.trigger_based || out->ftm.non_trigger_based) && + out->ftm.preamble != NL80211_PREAMBLE_HE) { + NL_SET_ERR_MSG_ATTR(info->extack, + tb[NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE], + "FTM: non EDCA based ranging must use HE preamble"); + return -EINVAL; + } + return 0; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index dbda08ec566e..d560d723b601 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2613,7 +2613,6 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, xdst->xfrm_genid = xfrm[i]->genid; dst1->obsolete = DST_OBSOLETE_FORCE_CHK; - dst1->flags |= DST_HOST; dst1->lastuse = now; dst1->input = dst_discard; @@ -2899,7 +2898,7 @@ static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, dst_copy_metrics(dst1, dst); dst1->obsolete = DST_OBSOLETE_FORCE_CHK; - dst1->flags |= DST_HOST | DST_XFRM_QUEUE; + dst1->flags |= DST_XFRM_QUEUE; dst1->lastuse = jiffies; dst1->input = dst_discard; diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index cebed6fb5bbb..f43d193aff3a 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -400,6 +400,7 @@ class PrinterHelpers(Printer): 'struct bpf_fib_lookup', 'struct bpf_perf_event_data', 'struct bpf_perf_event_value', + 'struct bpf_pidns_info', 'struct bpf_sock', 'struct bpf_sock_addr', 'struct bpf_sock_ops', @@ -435,6 +436,7 @@ class PrinterHelpers(Printer): 'struct bpf_fib_lookup', 'struct bpf_perf_event_data', 'struct bpf_perf_event_value', + 'struct bpf_pidns_info', 'struct bpf_sock', 'struct bpf_sock_addr', 'struct bpf_sock_ops', diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index dd484e92752e..ac569e197bfa 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -63,12 +63,18 @@ vmlinux_link() local lds="${objtree}/${KBUILD_LDS}" local output=${1} local objects + local strip_debug info LD ${output} # skip output file argument shift + # The kallsyms linking does not need debug symbols included. + if [ "$output" != "${output#.tmp_vmlinux.kallsyms}" ] ; then + strip_debug=-Wl,--strip-debug + fi + if [ "${SRCARCH}" != "um" ]; then objects="--whole-archive \ ${KBUILD_VMLINUX_OBJS} \ @@ -79,6 +85,7 @@ vmlinux_link() ${@}" ${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} \ + ${strip_debug#-Wl,} \ -o ${output} \ -T ${lds} ${objects} else @@ -91,6 +98,7 @@ vmlinux_link() ${@}" ${CC} ${CFLAGS_vmlinux} \ + ${strip_debug} \ -o ${output} \ -Wl,-T,${lds} \ ${objects} \ @@ -106,6 +114,8 @@ gen_btf() { local pahole_ver local bin_arch + local bin_format + local bin_file if ! [ -x "$(command -v ${PAHOLE})" ]; then echo >&2 "BTF: ${1}: pahole (${PAHOLE}) is not available" @@ -118,8 +128,9 @@ gen_btf() return 1 fi - info "BTF" ${2} vmlinux_link ${1} + + info "BTF" ${2} LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${1} # dump .BTF section into raw binary file to link with final vmlinux @@ -127,11 +138,12 @@ gen_btf() cut -d, -f1 | cut -d' ' -f2) bin_format=$(LANG=C ${OBJDUMP} -f ${1} | grep 'file format' | \ awk '{print $4}') + bin_file=.btf.vmlinux.bin ${OBJCOPY} --change-section-address .BTF=0 \ --set-section-flags .BTF=alloc -O binary \ - --only-section=.BTF ${1} .btf.vmlinux.bin + --only-section=.BTF ${1} $bin_file ${OBJCOPY} -I binary -O ${bin_format} -B ${bin_arch} \ - --rename-section .data=.BTF .btf.vmlinux.bin ${2} + --rename-section .data=.BTF $bin_file ${2} } # Create ${2} .o file with all symbols from the ${1} object file @@ -166,8 +178,8 @@ kallsyms() kallsyms_step() { kallsymso_prev=${kallsymso} - kallsymso=.tmp_kallsyms${1}.o - kallsyms_vmlinux=.tmp_vmlinux${1} + kallsyms_vmlinux=.tmp_vmlinux.kallsyms${1} + kallsymso=${kallsyms_vmlinux}.o vmlinux_link ${kallsyms_vmlinux} "${kallsymso_prev}" ${btf_vmlinux_bin_o} kallsyms ${kallsyms_vmlinux} ${kallsymso} @@ -190,7 +202,6 @@ cleanup() { rm -f .btf.* rm -f .tmp_System.map - rm -f .tmp_kallsyms* rm -f .tmp_vmlinux* rm -f System.map rm -f vmlinux @@ -257,9 +268,8 @@ tr '\0' '\n' < modules.builtin.modinfo | sed -n 's/^[[:alnum:]:_]*\.file=//p' | btf_vmlinux_bin_o="" if [ -n "${CONFIG_DEBUG_INFO_BTF}" ]; then - if gen_btf .tmp_vmlinux.btf .btf.vmlinux.bin.o ; then - btf_vmlinux_bin_o=.btf.vmlinux.bin.o - else + btf_vmlinux_bin_o=.btf.vmlinux.bin.o + if ! gen_btf .tmp_vmlinux.btf $btf_vmlinux_bin_o ; then echo >&2 "Failed to generate BTF for vmlinux" echo >&2 "Try to disable CONFIG_DEBUG_INFO_BTF" exit 1 diff --git a/scripts/parse-maintainers.pl b/scripts/parse-maintainers.pl index 255cef1b098d..255cef1b098d 100644..100755 --- a/scripts/parse-maintainers.pl +++ b/scripts/parse-maintainers.pl diff --git a/sound/mips/sgio2audio.c b/sound/mips/sgio2audio.c index 9f60a5037f8b..5bf1ea150f26 100644 --- a/sound/mips/sgio2audio.c +++ b/sound/mips/sgio2audio.c @@ -649,8 +649,6 @@ snd_sgio2audio_pcm_pointer(struct snd_pcm_substream *substream) static const struct snd_pcm_ops snd_sgio2audio_playback1_ops = { .open = snd_sgio2audio_playback1_open, .close = snd_sgio2audio_pcm_close, - .hw_params = snd_sgio2audio_pcm_hw_params, - .hw_free = snd_sgio2audio_pcm_hw_free, .prepare = snd_sgio2audio_pcm_prepare, .trigger = snd_sgio2audio_pcm_trigger, .pointer = snd_sgio2audio_pcm_pointer, @@ -659,8 +657,6 @@ static const struct snd_pcm_ops snd_sgio2audio_playback1_ops = { static const struct snd_pcm_ops snd_sgio2audio_playback2_ops = { .open = snd_sgio2audio_playback2_open, .close = snd_sgio2audio_pcm_close, - .hw_params = snd_sgio2audio_pcm_hw_params, - .hw_free = snd_sgio2audio_pcm_hw_free, .prepare = snd_sgio2audio_pcm_prepare, .trigger = snd_sgio2audio_pcm_trigger, .pointer = snd_sgio2audio_pcm_pointer, @@ -669,8 +665,6 @@ static const struct snd_pcm_ops snd_sgio2audio_playback2_ops = { static const struct snd_pcm_ops snd_sgio2audio_capture_ops = { .open = snd_sgio2audio_capture_open, .close = snd_sgio2audio_pcm_close, - .hw_params = snd_sgio2audio_pcm_hw_params, - .hw_free = snd_sgio2audio_pcm_hw_free, .prepare = snd_sgio2audio_pcm_prepare, .trigger = snd_sgio2audio_pcm_trigger, .pointer = snd_sgio2audio_pcm_pointer, diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 477589e7ec1d..0ac06ff1a17c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2447,6 +2447,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1071, 0x8258, "Evesham Voyaeger", ALC882_FIXUP_EAPD), SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE), SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), + SND_PCI_QUIRK(0x1458, 0xa0cd, "Gigabyte X570 Aorus Master", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1228, "MSI-GP63", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1276, "MSI-GL73", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1293, "MSI-GP65", ALC1220_FIXUP_CLEVO_P950), @@ -5920,7 +5921,8 @@ enum { ALC289_FIXUP_DUAL_SPK, ALC294_FIXUP_SPK2_TO_DAC1, ALC294_FIXUP_ASUS_DUAL_SPK, - + ALC285_FIXUP_THINKPAD_HEADSET_JACK, + ALC294_FIXUP_ASUS_HPE, }; static const struct hda_fixup alc269_fixups[] = { @@ -6684,6 +6686,8 @@ static const struct hda_fixup alc269_fixups[] = { [ALC285_FIXUP_SPEAKER2_TO_DAC1] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_speaker2_to_dac1, + .chained = true, + .chain_id = ALC269_FIXUP_THINKPAD_ACPI }, [ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER] = { .type = HDA_FIXUP_PINS, @@ -7040,7 +7044,23 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC294_FIXUP_SPK2_TO_DAC1 }, - + [ALC285_FIXUP_THINKPAD_HEADSET_JACK] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_headset_jack, + .chained = true, + .chain_id = ALC285_FIXUP_SPEAKER2_TO_DAC1 + }, + [ALC294_FIXUP_ASUS_HPE] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + /* Set EAPD high */ + { 0x20, AC_VERB_SET_COEF_INDEX, 0x0f }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x7774 }, + { } + }, + .chained = true, + .chain_id = ALC294_FIXUP_ASUS_HEADSET_MIC + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -7115,6 +7135,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0935, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB), SND_PCI_QUIRK(0x1028, 0x097e, "Dell Precision", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x097d, "Dell Precision", ALC289_FIXUP_DUAL_SPK), + SND_PCI_QUIRK(0x1028, 0x098d, "Dell Precision", ALC233_FIXUP_ASUS_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x09bf, "Dell Precision", ALC233_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), @@ -7204,6 +7226,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_DUAL_SPK), SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC), + SND_PCI_QUIRK(0x1043, 0x19ce, "ASUS B9450FA", ALC294_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW), SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC), @@ -7274,8 +7297,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x224c, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x224d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x225d, "Thinkpad T480", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), - SND_PCI_QUIRK(0x17aa, 0x2292, "Thinkpad X1 Yoga 7th", ALC285_FIXUP_SPEAKER2_TO_DAC1), - SND_PCI_QUIRK(0x17aa, 0x2293, "Thinkpad X1 Carbon 7th", ALC285_FIXUP_SPEAKER2_TO_DAC1), + SND_PCI_QUIRK(0x17aa, 0x2292, "Thinkpad X1 Yoga 7th", ALC285_FIXUP_THINKPAD_HEADSET_JACK), + SND_PCI_QUIRK(0x17aa, 0x2293, "Thinkpad X1 Carbon 7th", ALC285_FIXUP_THINKPAD_HEADSET_JACK), SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 7e90f5d83097..ea912439e446 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -1406,7 +1406,7 @@ config SND_SOC_WM8737 depends on SND_SOC_I2C_AND_SPI config SND_SOC_WM8741 - tristate "Wolfson Microelectronics WM8737 DAC" + tristate "Wolfson Microelectronics WM8741 DAC" depends on SND_SOC_I2C_AND_SPI config SND_SOC_WM8750 diff --git a/sound/soc/codecs/pcm512x.c b/sound/soc/codecs/pcm512x.c index 861210f6bf4f..4cbef9affffd 100644 --- a/sound/soc/codecs/pcm512x.c +++ b/sound/soc/codecs/pcm512x.c @@ -1564,13 +1564,15 @@ int pcm512x_probe(struct device *dev, struct regmap *regmap) } pcm512x->sclk = devm_clk_get(dev, NULL); - if (PTR_ERR(pcm512x->sclk) == -EPROBE_DEFER) - return -EPROBE_DEFER; + if (PTR_ERR(pcm512x->sclk) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto err; + } if (!IS_ERR(pcm512x->sclk)) { ret = clk_prepare_enable(pcm512x->sclk); if (ret != 0) { dev_err(dev, "Failed to enable SCLK: %d\n", ret); - return ret; + goto err; } } diff --git a/sound/soc/codecs/rt1015.c b/sound/soc/codecs/rt1015.c index 6d490e2dbc25..66eb55b4ffd4 100644 --- a/sound/soc/codecs/rt1015.c +++ b/sound/soc/codecs/rt1015.c @@ -664,7 +664,7 @@ static int rt1015_hw_params(struct snd_pcm_substream *substream, snd_soc_component_update_bits(component, RT1015_TDM_MASTER, RT1015_I2S_DL_MASK, val_len); snd_soc_component_update_bits(component, RT1015_CLK2, - RT1015_FS_PD_MASK, pre_div); + RT1015_FS_PD_MASK, pre_div << RT1015_FS_PD_SFT); return 0; } @@ -857,6 +857,7 @@ struct snd_soc_dai_driver rt1015_dai[] = { .rates = RT1015_STEREO_RATES, .formats = RT1015_FORMATS, }, + .ops = &rt1015_aif_dai_ops, } }; diff --git a/sound/soc/codecs/tas2562.c b/sound/soc/codecs/tas2562.c index 729acd874c48..be52886a5edb 100644 --- a/sound/soc/codecs/tas2562.c +++ b/sound/soc/codecs/tas2562.c @@ -215,7 +215,8 @@ static int tas2562_set_bitwidth(struct tas2562_data *tas2562, int bitwidth) break; default: - dev_info(tas2562->dev, "Not supported params format\n"); + dev_info(tas2562->dev, "Unsupported bitwidth format\n"); + return -EINVAL; } ret = snd_soc_component_update_bits(tas2562->component, @@ -251,7 +252,7 @@ static int tas2562_hw_params(struct snd_pcm_substream *substream, ret = tas2562_set_samplerate(tas2562, params_rate(params)); if (ret) - dev_err(tas2562->dev, "set bitwidth failed, %d\n", ret); + dev_err(tas2562->dev, "set sample rate failed, %d\n", ret); return ret; } diff --git a/sound/soc/intel/skylake/skl-debug.c b/sound/soc/intel/skylake/skl-debug.c index 3466675f2678..a15aa2ffa681 100644 --- a/sound/soc/intel/skylake/skl-debug.c +++ b/sound/soc/intel/skylake/skl-debug.c @@ -34,8 +34,8 @@ static ssize_t skl_print_pins(struct skl_module_pin *m_pin, char *buf, int i; ssize_t ret = 0; - for (i = 0; i < max_pin; i++) - ret += snprintf(buf + size, MOD_BUF - size, + for (i = 0; i < max_pin; i++) { + ret += scnprintf(buf + size, MOD_BUF - size, "%s %d\n\tModule %d\n\tInstance %d\n\t" "In-used %s\n\tType %s\n" "\tState %d\n\tIndex %d\n", @@ -45,13 +45,15 @@ static ssize_t skl_print_pins(struct skl_module_pin *m_pin, char *buf, m_pin[i].in_use ? "Used" : "Unused", m_pin[i].is_dynamic ? "Dynamic" : "Static", m_pin[i].pin_state, i); + size += ret; + } return ret; } static ssize_t skl_print_fmt(struct skl_module_fmt *fmt, char *buf, ssize_t size, bool direction) { - return snprintf(buf + size, MOD_BUF - size, + return scnprintf(buf + size, MOD_BUF - size, "%s\n\tCh %d\n\tFreq %d\n\tBit depth %d\n\t" "Valid bit depth %d\n\tCh config %#x\n\tInterleaving %d\n\t" "Sample Type %d\n\tCh Map %#x\n", @@ -75,16 +77,16 @@ static ssize_t module_read(struct file *file, char __user *user_buf, if (!buf) return -ENOMEM; - ret = snprintf(buf, MOD_BUF, "Module:\n\tUUID %pUL\n\tModule id %d\n" + ret = scnprintf(buf, MOD_BUF, "Module:\n\tUUID %pUL\n\tModule id %d\n" "\tInstance id %d\n\tPvt_id %d\n", mconfig->guid, mconfig->id.module_id, mconfig->id.instance_id, mconfig->id.pvt_id); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Resources:\n\tCPC %#x\n\tIBS %#x\n\tOBS %#x\t\n", res->cpc, res->ibs, res->obs); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Module data:\n\tCore %d\n\tIn queue %d\n\t" "Out queue %d\n\tType %s\n", mconfig->core_id, mconfig->max_in_queue, @@ -94,38 +96,38 @@ static ssize_t module_read(struct file *file, char __user *user_buf, ret += skl_print_fmt(mconfig->in_fmt, buf, ret, true); ret += skl_print_fmt(mconfig->out_fmt, buf, ret, false); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Fixup:\n\tParams %#x\n\tConverter %#x\n", mconfig->params_fixup, mconfig->converter); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Module Gateway:\n\tType %#x\n\tVbus %#x\n\tHW conn %#x\n\tSlot %#x\n", mconfig->dev_type, mconfig->vbus_id, mconfig->hw_conn_type, mconfig->time_slot); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Pipeline:\n\tID %d\n\tPriority %d\n\tConn Type %d\n\t" "Pages %#x\n", mconfig->pipe->ppl_id, mconfig->pipe->pipe_priority, mconfig->pipe->conn_type, mconfig->pipe->memory_pages); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "\tParams:\n\t\tHost DMA %d\n\t\tLink DMA %d\n", mconfig->pipe->p_params->host_dma_id, mconfig->pipe->p_params->link_dma_id); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "\tPCM params:\n\t\tCh %d\n\t\tFreq %d\n\t\tFormat %d\n", mconfig->pipe->p_params->ch, mconfig->pipe->p_params->s_freq, mconfig->pipe->p_params->s_fmt); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "\tLink %#x\n\tStream %#x\n", mconfig->pipe->p_params->linktype, mconfig->pipe->p_params->stream); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "\tState %d\n\tPassthru %s\n", mconfig->pipe->state, mconfig->pipe->passthru ? "true" : "false"); @@ -135,7 +137,7 @@ static ssize_t module_read(struct file *file, char __user *user_buf, ret += skl_print_pins(mconfig->m_out_pin, buf, mconfig->max_out_queue, ret, false); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Other:\n\tDomain %d\n\tHomogeneous Input %s\n\t" "Homogeneous Output %s\n\tIn Queue Mask %d\n\t" "Out Queue Mask %d\n\tDMA ID %d\n\tMem Pages %d\n\t" @@ -191,7 +193,7 @@ static ssize_t fw_softreg_read(struct file *file, char __user *user_buf, __ioread32_copy(d->fw_read_buff, fw_reg_addr, w0_stat_sz >> 2); for (offset = 0; offset < FW_REG_SIZE; offset += 16) { - ret += snprintf(tmp + ret, FW_REG_BUF - ret, "%#.4x: ", offset); + ret += scnprintf(tmp + ret, FW_REG_BUF - ret, "%#.4x: ", offset); hex_dump_to_buffer(d->fw_read_buff + offset, 16, 16, 4, tmp + ret, FW_REG_BUF - ret, 0); ret += strlen(tmp + ret); diff --git a/sound/soc/intel/skylake/skl-ssp-clk.c b/sound/soc/intel/skylake/skl-ssp-clk.c index 1c0e5226cb5b..bd43885f3805 100644 --- a/sound/soc/intel/skylake/skl-ssp-clk.c +++ b/sound/soc/intel/skylake/skl-ssp-clk.c @@ -384,9 +384,11 @@ static int skl_clk_dev_probe(struct platform_device *pdev) &clks[i], clk_pdata, i); if (IS_ERR(data->clk[data->avail_clk_cnt])) { - ret = PTR_ERR(data->clk[data->avail_clk_cnt++]); + ret = PTR_ERR(data->clk[data->avail_clk_cnt]); goto err_unreg_skl_clk; } + + data->avail_clk_cnt++; } platform_set_drvdata(pdev, data); diff --git a/sound/soc/meson/g12a-tohdmitx.c b/sound/soc/meson/g12a-tohdmitx.c index 9cfbd343a00c..8a0db28a6a40 100644 --- a/sound/soc/meson/g12a-tohdmitx.c +++ b/sound/soc/meson/g12a-tohdmitx.c @@ -8,6 +8,7 @@ #include <linux/module.h> #include <sound/pcm_params.h> #include <linux/regmap.h> +#include <linux/reset.h> #include <sound/soc.h> #include <sound/soc-dai.h> @@ -378,6 +379,11 @@ static int g12a_tohdmitx_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; void __iomem *regs; struct regmap *map; + int ret; + + ret = device_reset(dev); + if (ret) + return ret; regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(regs)) diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c index 14e175cdeeb8..785a0385cc7f 100644 --- a/sound/soc/soc-component.c +++ b/sound/soc/soc-component.c @@ -451,7 +451,7 @@ int snd_soc_pcm_component_sync_stop(struct snd_pcm_substream *substream) int i, ret; for_each_rtd_components(rtd, i, component) { - if (component->driver->ioctl) { + if (component->driver->sync_stop) { ret = component->driver->sync_stop(component, substream); if (ret < 0) diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c index 223cd045719e..392a1c5b15d3 100644 --- a/sound/soc/soc-compress.c +++ b/sound/soc/soc-compress.c @@ -299,7 +299,7 @@ static int soc_compr_free_fe(struct snd_compr_stream *cstream) for_each_dpcm_be(fe, stream, dpcm) dpcm->state = SND_SOC_DPCM_LINK_STATE_FREE; - snd_soc_dapm_stream_stop(fe, stream); + dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_STOP); fe->dpcm[stream].state = SND_SOC_DPCM_STATE_CLOSE; fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 9b130561d562..9fb54e6fe254 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -4772,7 +4772,7 @@ static void soc_dapm_shutdown_dapm(struct snd_soc_dapm_context *dapm) continue; if (w->power) { dapm_seq_insert(w, &down_list, false); - w->power = 0; + w->new_power = 0; powerdown = 1; } } diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index ff1b7c7078e5..2c59b3688ca0 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -2006,7 +2006,7 @@ static int dpcm_fe_dai_shutdown(struct snd_pcm_substream *substream) soc_pcm_close(substream); /* run the stream event for each BE */ - snd_soc_dapm_stream_stop(fe, stream); + dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_STOP); fe->dpcm[stream].state = SND_SOC_DPCM_STATE_CLOSE; dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); @@ -3171,16 +3171,16 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, unsigned long flags; /* FE state */ - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, "[%s - %s]\n", fe->dai_link->name, stream ? "Capture" : "Playback"); - offset += snprintf(buf + offset, size - offset, "State: %s\n", + offset += scnprintf(buf + offset, size - offset, "State: %s\n", dpcm_state_string(fe->dpcm[stream].state)); if ((fe->dpcm[stream].state >= SND_SOC_DPCM_STATE_HW_PARAMS) && (fe->dpcm[stream].state <= SND_SOC_DPCM_STATE_STOP)) - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, "Hardware Params: " "Format = %s, Channels = %d, Rate = %d\n", snd_pcm_format_name(params_format(params)), @@ -3188,10 +3188,10 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, params_rate(params)); /* BEs state */ - offset += snprintf(buf + offset, size - offset, "Backends:\n"); + offset += scnprintf(buf + offset, size - offset, "Backends:\n"); if (list_empty(&fe->dpcm[stream].be_clients)) { - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, " No active DSP links\n"); goto out; } @@ -3201,16 +3201,16 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, struct snd_soc_pcm_runtime *be = dpcm->be; params = &dpcm->hw_params; - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, "- %s\n", be->dai_link->name); - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, " State: %s\n", dpcm_state_string(be->dpcm[stream].state)); if ((be->dpcm[stream].state >= SND_SOC_DPCM_STATE_HW_PARAMS) && (be->dpcm[stream].state <= SND_SOC_DPCM_STATE_STOP)) - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, " Hardware Params: " "Format = %s, Channels = %d, Rate = %d\n", snd_pcm_format_name(params_format(params)), diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index d2ee6ad20e83..575da6aba807 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -2377,8 +2377,11 @@ static int soc_tplg_link_elems_load(struct soc_tplg *tplg, } ret = soc_tplg_link_config(tplg, _link); - if (ret < 0) + if (ret < 0) { + if (!abi_match) + kfree(_link); return ret; + } /* offset by version-specific struct size and * real priv data size @@ -2542,7 +2545,7 @@ static int soc_tplg_manifest_load(struct soc_tplg *tplg, { struct snd_soc_tplg_manifest *manifest, *_manifest; bool abi_match; - int err; + int ret = 0; if (tplg->pass != SOC_TPLG_PASS_MANIFEST) return 0; @@ -2555,19 +2558,19 @@ static int soc_tplg_manifest_load(struct soc_tplg *tplg, _manifest = manifest; } else { abi_match = false; - err = manifest_new_ver(tplg, manifest, &_manifest); - if (err < 0) - return err; + ret = manifest_new_ver(tplg, manifest, &_manifest); + if (ret < 0) + return ret; } /* pass control to component driver for optional further init */ if (tplg->comp && tplg->ops && tplg->ops->manifest) - return tplg->ops->manifest(tplg->comp, tplg->index, _manifest); + ret = tplg->ops->manifest(tplg->comp, tplg->index, _manifest); if (!abi_match) /* free the duplicated one */ kfree(_manifest); - return 0; + return ret; } /* validate header magic, size and type */ diff --git a/sound/soc/sof/ipc.c b/sound/soc/sof/ipc.c index b63fc529b456..78aa1da7c7a9 100644 --- a/sound/soc/sof/ipc.c +++ b/sound/soc/sof/ipc.c @@ -499,7 +499,7 @@ int snd_sof_ipc_stream_posn(struct snd_soc_component *scomp, /* send IPC to the DSP */ err = sof_ipc_tx_message(sdev->ipc, - stream.hdr.cmd, &stream, sizeof(stream), &posn, + stream.hdr.cmd, &stream, sizeof(stream), posn, sizeof(*posn)); if (err < 0) { dev_err(sdev->dev, "error: failed to get stream %d position\n", diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index 30bcd5d3a32a..10eb4b8e8e7e 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -1543,20 +1543,20 @@ static int stm32_sai_sub_probe(struct platform_device *pdev) return ret; } - ret = devm_snd_soc_register_component(&pdev->dev, &stm32_component, - &sai->cpu_dai_drv, 1); + ret = snd_dmaengine_pcm_register(&pdev->dev, conf, 0); + if (ret) { + dev_err(&pdev->dev, "Could not register pcm dma\n"); + return ret; + } + + ret = snd_soc_register_component(&pdev->dev, &stm32_component, + &sai->cpu_dai_drv, 1); if (ret) return ret; if (STM_SAI_PROTOCOL_IS_SPDIF(sai)) conf = &stm32_sai_pcm_config_spdif; - ret = devm_snd_dmaengine_pcm_register(&pdev->dev, conf, 0); - if (ret) { - dev_err(&pdev->dev, "Could not register pcm dma\n"); - return ret; - } - return 0; } @@ -1565,6 +1565,8 @@ static int stm32_sai_sub_remove(struct platform_device *pdev) struct stm32_sai_sub_data *sai = dev_get_drvdata(&pdev->dev); clk_unprepare(sai->pdata->pclk); + snd_dmaengine_pcm_unregister(&pdev->dev); + snd_soc_unregister_component(&pdev->dev); return 0; } diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index ebe1685e92dd..d5e517d1c3dd 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -512,6 +512,8 @@ #define MSR_K7_HWCR 0xc0010015 #define MSR_K7_HWCR_SMMLOCK_BIT 0 #define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT) +#define MSR_K7_HWCR_IRPERF_EN_BIT 30 +#define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT) #define MSR_K7_FID_VID_CTL 0xc0010041 #define MSR_K7_FID_VID_STATUS 0xc0010042 diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 503d3f42da16..3f3f780c8c65 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -390,6 +390,7 @@ struct kvm_sync_regs { #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 #define KVM_STATE_NESTED_EVMCS 0x00000004 +#define KVM_STATE_NESTED_MTF_PENDING 0x00000008 #define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_SMM_VMXON 0x00000002 diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore index b13926432b84..8d6e8901ed2b 100644 --- a/tools/bpf/bpftool/.gitignore +++ b/tools/bpf/bpftool/.gitignore @@ -1,7 +1,9 @@ *.d +/_bpftool /bpftool bpftool*.8 bpf-helpers.* FEATURE-DUMP.bpftool feature libbpf +profiler.skel.h diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 46862e85fed2..9f19404f470e 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -30,6 +30,7 @@ PROG COMMANDS | **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*] | **bpftool** **prog tracelog** | **bpftool** **prog run** *PROG* **data_in** *FILE* [**data_out** *FILE* [**data_size_out** *L*]] [**ctx_in** *FILE* [**ctx_out** *FILE* [**ctx_size_out** *M*]]] [**repeat** *N*] +| **bpftool** **prog profile** *PROG* [**duration** *DURATION*] *METRICs* | **bpftool** **prog help** | | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } @@ -48,6 +49,9 @@ PROG COMMANDS | *ATTACH_TYPE* := { | **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector** | } +| *METRIC* := { +| **cycles** | **instructions** | **l1d_loads** | **llc_misses** +| } DESCRIPTION @@ -189,6 +193,12 @@ DESCRIPTION not all of them can take the **ctx_in**/**ctx_out** arguments. bpftool does not perform checks on program types. + **bpftool prog profile** *PROG* [**duration** *DURATION*] *METRICs* + Profile *METRICs* for bpf program *PROG* for *DURATION* + seconds or until user hits Ctrl-C. *DURATION* is optional. + If *DURATION* is not specified, the profiling will run up to + UINT_MAX seconds. + **bpftool prog help** Print short help message. @@ -311,6 +321,15 @@ EXAMPLES **# rm /sys/fs/bpf/xdp1** +| +| **# bpftool prog profile id 337 duration 10 cycles instructions llc_misses** + +:: + 51397 run_cnt + 40176203 cycles (83.05%) + 42518139 instructions # 1.06 insns per cycle (83.39%) + 123 llc_misses # 2.89 LLC misses per million insns (83.15%) + SEE ALSO ======== **bpf**\ (2), diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index c4e810335810..f584d1fdfc64 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -59,10 +59,12 @@ LIBS = $(LIBBPF) -lelf -lz INSTALL ?= install RM ?= rm -f +CLANG ?= clang FEATURE_USER = .bpftool -FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib -FEATURE_DISPLAY = libbfd disassembler-four-args zlib +FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib \ + clang-bpf-global-var +FEATURE_DISPLAY = libbfd disassembler-four-args zlib clang-bpf-global-var check_feat := 1 NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall @@ -110,14 +112,39 @@ SRCS += $(BFD_SRCS) endif OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o +_OBJS = $(filter-out $(OUTPUT)prog.o,$(OBJS)) $(OUTPUT)_prog.o + +ifeq ($(feature-clang-bpf-global-var),1) + __OBJS = $(OBJS) +else + __OBJS = $(_OBJS) +endif + +$(OUTPUT)_prog.o: prog.c + $(QUIET_CC)$(COMPILE.c) -MMD -DBPFTOOL_WITHOUT_SKELETONS -o $@ $< + +$(OUTPUT)_bpftool: $(_OBJS) $(LIBBPF) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(_OBJS) $(LIBS) + +skeleton/profiler.bpf.o: skeleton/profiler.bpf.c $(LIBBPF) + $(QUIET_CLANG)$(CLANG) \ + -I$(srctree)/tools/include/uapi/ \ + -I$(LIBBPF_PATH) -I$(srctree)/tools/lib \ + -g -O2 -target bpf -c $< -o $@ + +profiler.skel.h: $(OUTPUT)_bpftool skeleton/profiler.bpf.o + $(QUIET_GEN)$(OUTPUT)./_bpftool gen skeleton skeleton/profiler.bpf.o > $@ + +$(OUTPUT)prog.o: prog.c profiler.skel.h + $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $< $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $< $(OUTPUT)feature.o: | zdep -$(OUTPUT)bpftool: $(OBJS) $(LIBBPF) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) +$(OUTPUT)bpftool: $(__OBJS) $(LIBBPF) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(__OBJS) $(LIBS) $(OUTPUT)%.o: %.c $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $< @@ -125,6 +152,7 @@ $(OUTPUT)%.o: %.c clean: $(LIBBPF)-clean $(call QUIET_CLEAN, bpftool) $(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d + $(Q)$(RM) -- $(OUTPUT)_bpftool profiler.skel.h skeleton/profiler.bpf.o $(Q)$(RM) -r -- $(OUTPUT)libbpf/ $(call QUIET_CLEAN, core-gen) $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index f2838a658339..9b0534f558f1 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -337,6 +337,7 @@ _bpftool() local PROG_TYPE='id pinned tag name' local MAP_TYPE='id pinned name' + local METRIC_TYPE='cycles instructions l1d_loads llc_misses' case $command in show|list) [[ $prev != "$command" ]] && return 0 @@ -388,7 +389,7 @@ _bpftool() _bpftool_get_prog_ids ;; name) - _bpftool_get_map_names + _bpftool_get_prog_names ;; pinned) _filedir @@ -498,9 +499,51 @@ _bpftool() tracelog) return 0 ;; + profile) + case $cword in + 3) + COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) ) + return 0 + ;; + 4) + case $prev in + id) + _bpftool_get_prog_ids + ;; + name) + _bpftool_get_prog_names + ;; + pinned) + _filedir + ;; + esac + return 0 + ;; + 5) + COMPREPLY=( $( compgen -W "$METRIC_TYPE duration" -- "$cur" ) ) + return 0 + ;; + 6) + case $prev in + duration) + return 0 + ;; + *) + COMPREPLY=( $( compgen -W "$METRIC_TYPE" -- "$cur" ) ) + return 0 + ;; + esac + return 0 + ;; + *) + COMPREPLY=( $( compgen -W "$METRIC_TYPE" -- "$cur" ) ) + return 0 + ;; + esac + ;; run) - if [[ ${#words[@]} -lt 5 ]]; then - _filedir + if [[ ${#words[@]} -eq 4 ]]; then + COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) ) return 0 fi case $prev in @@ -508,6 +551,10 @@ _bpftool() _bpftool_get_prog_ids return 0 ;; + name) + _bpftool_get_prog_names + return 0 + ;; data_in|data_out|ctx_in|ctx_out) _filedir return 0 @@ -525,7 +572,7 @@ _bpftool() *) [[ $prev == $object ]] && \ COMPREPLY=( $( compgen -W 'dump help pin attach detach \ - load loadall show list tracelog run' -- "$cur" ) ) + load loadall show list tracelog run profile' -- "$cur" ) ) ;; esac ;; @@ -713,11 +760,17 @@ _bpftool() esac ;; pin) - if [[ $prev == "$command" ]]; then - COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) ) - else - _filedir - fi + case $prev in + $command) + COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) + ;; + id) + _bpftool_get_map_ids + ;; + name) + _bpftool_get_map_names + ;; + esac return 0 ;; event_pipe) @@ -844,7 +897,7 @@ _bpftool() case $command in skeleton) _filedir - ;; + ;; *) [[ $prev == $object ]] && \ COMPREPLY=( $( compgen -W 'skeleton help' -- "$cur" ) ) @@ -944,6 +997,9 @@ _bpftool() id) _bpftool_get_prog_ids ;; + name) + _bpftool_get_prog_names + ;; pinned) _filedir ;; diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index b3745ed711ba..bcaf55b59498 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -389,6 +389,9 @@ static int dump_btf_c(const struct btf *btf, if (IS_ERR(d)) return PTR_ERR(d); + printf("#ifndef __VMLINUX_H__\n"); + printf("#define __VMLINUX_H__\n"); + printf("\n"); printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n"); printf("#pragma clang attribute push (__attribute__((preserve_access_index)), apply_to = record)\n"); printf("#endif\n\n"); @@ -412,6 +415,8 @@ static int dump_btf_c(const struct btf *btf, printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n"); printf("#pragma clang attribute pop\n"); printf("#endif\n"); + printf("\n"); + printf("#endif /* __VMLINUX_H__ */\n"); done: btf_dump__free(d); diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index b75b8ec5469c..f2223dbdfb0a 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -211,39 +211,14 @@ int do_pin_fd(int fd, const char *name) return err; } -int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) +int do_pin_any(int argc, char **argv, int (*get_fd)(int *, char ***)) { - unsigned int id; - char *endptr; int err; int fd; - if (argc < 3) { - p_err("too few arguments, id ID and FILE path is required"); - return -1; - } else if (argc > 3) { - p_err("too many arguments"); - return -1; - } - - if (!is_prefix(*argv, "id")) { - p_err("expected 'id' got %s", *argv); - return -1; - } - NEXT_ARG(); - - id = strtoul(*argv, &endptr, 0); - if (*endptr) { - p_err("can't parse %s as ID", *argv); - return -1; - } - NEXT_ARG(); - - fd = get_fd_by_id(id); - if (fd < 0) { - p_err("can't open object by id (%u): %s", id, strerror(errno)); - return -1; - } + fd = get_fd(&argc, &argv); + if (fd < 0) + return fd; err = do_pin_fd(fd, *argv); @@ -597,3 +572,10 @@ int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what) return 0; } + +int __printf(2, 0) +print_all_levels(__maybe_unused enum libbpf_print_level level, + const char *format, va_list args) +{ + return vfprintf(stderr, format, args); +} diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 6d41bbfc6459..06449e846e4b 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -79,13 +79,6 @@ static int do_version(int argc, char **argv) return 0; } -static int __printf(2, 0) -print_all_levels(__maybe_unused enum libbpf_print_level level, - const char *format, va_list args) -{ - return vfprintf(stderr, format, args); -} - int cmd_select(const struct cmd *cmds, int argc, char **argv, int (*help)(int argc, char **argv)) { diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 724ef9d941d3..5f6dccd43622 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -14,6 +14,8 @@ #include <linux/hashtable.h> #include <tools/libc_compat.h> +#include <bpf/libbpf.h> + #include "json_writer.h" #define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) @@ -146,7 +148,7 @@ char *get_fdinfo(int fd, const char *key); int open_obj_pinned(char *path, bool quiet); int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type); int mount_bpffs_for_pin(const char *name); -int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)); +int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***)); int do_pin_fd(int fd, const char *name); int do_prog(int argc, char **arg); @@ -229,4 +231,7 @@ struct tcmsg; int do_xdp_dump(struct ifinfomsg *ifinfo, struct nlattr **tb); int do_filter_dump(struct tcmsg *ifinfo, struct nlattr **tb, const char *kind, const char *devname, int ifindex); + +int print_all_levels(__maybe_unused enum libbpf_print_level level, + const char *format, va_list args); #endif diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index e6c85680b34d..693a632f6813 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -1384,7 +1384,7 @@ static int do_pin(int argc, char **argv) { int err; - err = do_pin_any(argc, argv, bpf_map_get_fd_by_id); + err = do_pin_any(argc, argv, map_parse_fd); if (!err && json_output) jsonw_null(json_wtr); return err; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 1996e67a2f00..f6a5974a7b0a 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -4,6 +4,7 @@ #define _GNU_SOURCE #include <errno.h> #include <fcntl.h> +#include <signal.h> #include <stdarg.h> #include <stdio.h> #include <stdlib.h> @@ -11,10 +12,13 @@ #include <time.h> #include <unistd.h> #include <net/if.h> +#include <sys/ioctl.h> #include <sys/types.h> #include <sys/stat.h> +#include <sys/syscall.h> #include <linux/err.h> +#include <linux/perf_event.h> #include <linux/sizes.h> #include <bpf/bpf.h> @@ -809,7 +813,7 @@ static int do_pin(int argc, char **argv) { int err; - err = do_pin_any(argc, argv, bpf_prog_get_fd_by_id); + err = do_pin_any(argc, argv, prog_parse_fd); if (!err && json_output) jsonw_null(json_wtr); return err; @@ -1243,6 +1247,25 @@ free_data_in: return err; } +static int +get_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, + enum bpf_attach_type *expected_attach_type) +{ + libbpf_print_fn_t print_backup; + int ret; + + ret = libbpf_prog_type_by_name(name, prog_type, expected_attach_type); + if (!ret) + return ret; + + /* libbpf_prog_type_by_name() failed, let's re-run with debug level */ + print_backup = libbpf_set_print(print_all_levels); + ret = libbpf_prog_type_by_name(name, prog_type, expected_attach_type); + libbpf_set_print(print_backup); + + return ret; +} + static int load_with_options(int argc, char **argv, bool first_prog_only) { enum bpf_prog_type common_prog_type = BPF_PROG_TYPE_UNSPEC; @@ -1292,8 +1315,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) strcat(type, *argv); strcat(type, "/"); - err = libbpf_prog_type_by_name(type, &common_prog_type, - &expected_attach_type); + err = get_prog_type_by_name(type, &common_prog_type, + &expected_attach_type); free(type); if (err < 0) goto err_free_reuse_maps; @@ -1392,8 +1415,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) if (prog_type == BPF_PROG_TYPE_UNSPEC) { const char *sec_name = bpf_program__title(pos, false); - err = libbpf_prog_type_by_name(sec_name, &prog_type, - &expected_attach_type); + err = get_prog_type_by_name(sec_name, &prog_type, + &expected_attach_type); if (err < 0) goto err_close_obj; } @@ -1537,6 +1560,422 @@ static int do_loadall(int argc, char **argv) return load_with_options(argc, argv, false); } +#ifdef BPFTOOL_WITHOUT_SKELETONS + +static int do_profile(int argc, char **argv) +{ + p_err("bpftool prog profile command is not supported. Please build bpftool with clang >= 10.0.0"); + return 0; +} + +#else /* BPFTOOL_WITHOUT_SKELETONS */ + +#include "profiler.skel.h" + +struct profile_metric { + const char *name; + struct bpf_perf_event_value val; + struct perf_event_attr attr; + bool selected; + + /* calculate ratios like instructions per cycle */ + const int ratio_metric; /* 0 for N/A, 1 for index 0 (cycles) */ + const char *ratio_desc; + const float ratio_mul; +} metrics[] = { + { + .name = "cycles", + .attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .exclude_user = 1, + }, + }, + { + .name = "instructions", + .attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_INSTRUCTIONS, + .exclude_user = 1, + }, + .ratio_metric = 1, + .ratio_desc = "insns per cycle", + .ratio_mul = 1.0, + }, + { + .name = "l1d_loads", + .attr = { + .type = PERF_TYPE_HW_CACHE, + .config = + PERF_COUNT_HW_CACHE_L1D | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), + .exclude_user = 1, + }, + }, + { + .name = "llc_misses", + .attr = { + .type = PERF_TYPE_HW_CACHE, + .config = + PERF_COUNT_HW_CACHE_LL | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), + .exclude_user = 1 + }, + .ratio_metric = 2, + .ratio_desc = "LLC misses per million insns", + .ratio_mul = 1e6, + }, +}; + +static __u64 profile_total_count; + +#define MAX_NUM_PROFILE_METRICS 4 + +static int profile_parse_metrics(int argc, char **argv) +{ + unsigned int metric_cnt; + int selected_cnt = 0; + unsigned int i; + + metric_cnt = sizeof(metrics) / sizeof(struct profile_metric); + + while (argc > 0) { + for (i = 0; i < metric_cnt; i++) { + if (is_prefix(argv[0], metrics[i].name)) { + if (!metrics[i].selected) + selected_cnt++; + metrics[i].selected = true; + break; + } + } + if (i == metric_cnt) { + p_err("unknown metric %s", argv[0]); + return -1; + } + NEXT_ARG(); + } + if (selected_cnt > MAX_NUM_PROFILE_METRICS) { + p_err("too many (%d) metrics, please specify no more than %d metrics at at time", + selected_cnt, MAX_NUM_PROFILE_METRICS); + return -1; + } + return selected_cnt; +} + +static void profile_read_values(struct profiler_bpf *obj) +{ + __u32 m, cpu, num_cpu = obj->rodata->num_cpu; + int reading_map_fd, count_map_fd; + __u64 counts[num_cpu]; + __u32 key = 0; + int err; + + reading_map_fd = bpf_map__fd(obj->maps.accum_readings); + count_map_fd = bpf_map__fd(obj->maps.counts); + if (reading_map_fd < 0 || count_map_fd < 0) { + p_err("failed to get fd for map"); + return; + } + + err = bpf_map_lookup_elem(count_map_fd, &key, counts); + if (err) { + p_err("failed to read count_map: %s", strerror(errno)); + return; + } + + profile_total_count = 0; + for (cpu = 0; cpu < num_cpu; cpu++) + profile_total_count += counts[cpu]; + + for (m = 0; m < ARRAY_SIZE(metrics); m++) { + struct bpf_perf_event_value values[num_cpu]; + + if (!metrics[m].selected) + continue; + + err = bpf_map_lookup_elem(reading_map_fd, &key, values); + if (err) { + p_err("failed to read reading_map: %s", + strerror(errno)); + return; + } + for (cpu = 0; cpu < num_cpu; cpu++) { + metrics[m].val.counter += values[cpu].counter; + metrics[m].val.enabled += values[cpu].enabled; + metrics[m].val.running += values[cpu].running; + } + key++; + } +} + +static void profile_print_readings_json(void) +{ + __u32 m; + + jsonw_start_array(json_wtr); + for (m = 0; m < ARRAY_SIZE(metrics); m++) { + if (!metrics[m].selected) + continue; + jsonw_start_object(json_wtr); + jsonw_string_field(json_wtr, "metric", metrics[m].name); + jsonw_lluint_field(json_wtr, "run_cnt", profile_total_count); + jsonw_lluint_field(json_wtr, "value", metrics[m].val.counter); + jsonw_lluint_field(json_wtr, "enabled", metrics[m].val.enabled); + jsonw_lluint_field(json_wtr, "running", metrics[m].val.running); + + jsonw_end_object(json_wtr); + } + jsonw_end_array(json_wtr); +} + +static void profile_print_readings_plain(void) +{ + __u32 m; + + printf("\n%18llu %-20s\n", profile_total_count, "run_cnt"); + for (m = 0; m < ARRAY_SIZE(metrics); m++) { + struct bpf_perf_event_value *val = &metrics[m].val; + int r; + + if (!metrics[m].selected) + continue; + printf("%18llu %-20s", val->counter, metrics[m].name); + + r = metrics[m].ratio_metric - 1; + if (r >= 0 && metrics[r].selected && + metrics[r].val.counter > 0) { + printf("# %8.2f %-30s", + val->counter * metrics[m].ratio_mul / + metrics[r].val.counter, + metrics[m].ratio_desc); + } else { + printf("%-41s", ""); + } + + if (val->enabled > val->running) + printf("(%4.2f%%)", + val->running * 100.0 / val->enabled); + printf("\n"); + } +} + +static void profile_print_readings(void) +{ + if (json_output) + profile_print_readings_json(); + else + profile_print_readings_plain(); +} + +static char *profile_target_name(int tgt_fd) +{ + struct bpf_prog_info_linear *info_linear; + struct bpf_func_info *func_info; + const struct btf_type *t; + char *name = NULL; + struct btf *btf; + + info_linear = bpf_program__get_prog_info_linear( + tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO); + if (IS_ERR_OR_NULL(info_linear)) { + p_err("failed to get info_linear for prog FD %d", tgt_fd); + return NULL; + } + + if (info_linear->info.btf_id == 0 || + btf__get_from_id(info_linear->info.btf_id, &btf)) { + p_err("prog FD %d doesn't have valid btf", tgt_fd); + goto out; + } + + func_info = (struct bpf_func_info *)(info_linear->info.func_info); + t = btf__type_by_id(btf, func_info[0].type_id); + if (!t) { + p_err("btf %d doesn't have type %d", + info_linear->info.btf_id, func_info[0].type_id); + goto out; + } + name = strdup(btf__name_by_offset(btf, t->name_off)); +out: + free(info_linear); + return name; +} + +static struct profiler_bpf *profile_obj; +static int profile_tgt_fd = -1; +static char *profile_tgt_name; +static int *profile_perf_events; +static int profile_perf_event_cnt; + +static void profile_close_perf_events(struct profiler_bpf *obj) +{ + int i; + + for (i = profile_perf_event_cnt - 1; i >= 0; i--) + close(profile_perf_events[i]); + + free(profile_perf_events); + profile_perf_event_cnt = 0; +} + +static int profile_open_perf_events(struct profiler_bpf *obj) +{ + unsigned int cpu, m; + int map_fd, pmu_fd; + + profile_perf_events = calloc( + sizeof(int), obj->rodata->num_cpu * obj->rodata->num_metric); + if (!profile_perf_events) { + p_err("failed to allocate memory for perf_event array: %s", + strerror(errno)); + return -1; + } + map_fd = bpf_map__fd(obj->maps.events); + if (map_fd < 0) { + p_err("failed to get fd for events map"); + return -1; + } + + for (m = 0; m < ARRAY_SIZE(metrics); m++) { + if (!metrics[m].selected) + continue; + for (cpu = 0; cpu < obj->rodata->num_cpu; cpu++) { + pmu_fd = syscall(__NR_perf_event_open, &metrics[m].attr, + -1/*pid*/, cpu, -1/*group_fd*/, 0); + if (pmu_fd < 0 || + bpf_map_update_elem(map_fd, &profile_perf_event_cnt, + &pmu_fd, BPF_ANY) || + ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) { + p_err("failed to create event %s on cpu %d", + metrics[m].name, cpu); + return -1; + } + profile_perf_events[profile_perf_event_cnt++] = pmu_fd; + } + } + return 0; +} + +static void profile_print_and_cleanup(void) +{ + profile_close_perf_events(profile_obj); + profile_read_values(profile_obj); + profile_print_readings(); + profiler_bpf__destroy(profile_obj); + + close(profile_tgt_fd); + free(profile_tgt_name); +} + +static void int_exit(int signo) +{ + profile_print_and_cleanup(); + exit(0); +} + +static int do_profile(int argc, char **argv) +{ + int num_metric, num_cpu, err = -1; + struct bpf_program *prog; + unsigned long duration; + char *endptr; + + /* we at least need two args for the prog and one metric */ + if (!REQ_ARGS(3)) + return -EINVAL; + + /* parse target fd */ + profile_tgt_fd = prog_parse_fd(&argc, &argv); + if (profile_tgt_fd < 0) { + p_err("failed to parse fd"); + return -1; + } + + /* parse profiling optional duration */ + if (argc > 2 && is_prefix(argv[0], "duration")) { + NEXT_ARG(); + duration = strtoul(*argv, &endptr, 0); + if (*endptr) + usage(); + NEXT_ARG(); + } else { + duration = UINT_MAX; + } + + num_metric = profile_parse_metrics(argc, argv); + if (num_metric <= 0) + goto out; + + num_cpu = libbpf_num_possible_cpus(); + if (num_cpu <= 0) { + p_err("failed to identify number of CPUs"); + goto out; + } + + profile_obj = profiler_bpf__open(); + if (!profile_obj) { + p_err("failed to open and/or load BPF object"); + goto out; + } + + profile_obj->rodata->num_cpu = num_cpu; + profile_obj->rodata->num_metric = num_metric; + + /* adjust map sizes */ + bpf_map__resize(profile_obj->maps.events, num_metric * num_cpu); + bpf_map__resize(profile_obj->maps.fentry_readings, num_metric); + bpf_map__resize(profile_obj->maps.accum_readings, num_metric); + bpf_map__resize(profile_obj->maps.counts, 1); + + /* change target name */ + profile_tgt_name = profile_target_name(profile_tgt_fd); + if (!profile_tgt_name) + goto out; + + bpf_object__for_each_program(prog, profile_obj->obj) { + err = bpf_program__set_attach_target(prog, profile_tgt_fd, + profile_tgt_name); + if (err) { + p_err("failed to set attach target\n"); + goto out; + } + } + + set_max_rlimit(); + err = profiler_bpf__load(profile_obj); + if (err) { + p_err("failed to load profile_obj"); + goto out; + } + + err = profile_open_perf_events(profile_obj); + if (err) + goto out; + + err = profiler_bpf__attach(profile_obj); + if (err) { + p_err("failed to attach profile_obj"); + goto out; + } + signal(SIGINT, int_exit); + + sleep(duration); + profile_print_and_cleanup(); + return 0; + +out: + profile_close_perf_events(profile_obj); + if (profile_obj) + profiler_bpf__destroy(profile_obj); + close(profile_tgt_fd); + free(profile_tgt_name); + return err; +} + +#endif /* BPFTOOL_WITHOUT_SKELETONS */ + static int do_help(int argc, char **argv) { if (json_output) { @@ -1560,6 +1999,7 @@ static int do_help(int argc, char **argv) " [data_out FILE [data_size_out L]] \\\n" " [ctx_in FILE [ctx_out FILE [ctx_size_out M]]] \\\n" " [repeat N]\n" + " %s %s profile PROG [duration DURATION] METRICs\n" " %s %s tracelog\n" " %s %s help\n" "\n" @@ -1577,12 +2017,13 @@ static int do_help(int argc, char **argv) " struct_ops | fentry | fexit | freplace }\n" " ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n" " flow_dissector }\n" + " METRIC := { cycles | instructions | l1d_loads | llc_misses }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], - bin_name, argv[-2]); + bin_name, argv[-2], bin_name, argv[-2]); return 0; } @@ -1599,6 +2040,7 @@ static const struct cmd cmds[] = { { "detach", do_detach }, { "tracelog", do_tracelog }, { "run", do_run }, + { "profile", do_profile }, { 0 } }; diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c new file mode 100644 index 000000000000..20034c12f7c5 --- /dev/null +++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook +#include "profiler.h" +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +/* map of perf event fds, num_cpu * num_metric entries */ +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(int)); +} events SEC(".maps"); + +/* readings at fentry */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); +} fentry_readings SEC(".maps"); + +/* accumulated readings */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); +} accum_readings SEC(".maps"); + +/* sample counts, one per cpu */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(u64)); +} counts SEC(".maps"); + +const volatile __u32 num_cpu = 1; +const volatile __u32 num_metric = 1; +#define MAX_NUM_MATRICS 4 + +SEC("fentry/XXX") +int BPF_PROG(fentry_XXX) +{ + struct bpf_perf_event_value *ptrs[MAX_NUM_MATRICS]; + u32 key = bpf_get_smp_processor_id(); + u32 i; + + /* look up before reading, to reduce error */ + for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) { + u32 flag = i; + + ptrs[i] = bpf_map_lookup_elem(&fentry_readings, &flag); + if (!ptrs[i]) + return 0; + } + + for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) { + struct bpf_perf_event_value reading; + int err; + + err = bpf_perf_event_read_value(&events, key, &reading, + sizeof(reading)); + if (err) + return 0; + *(ptrs[i]) = reading; + key += num_cpu; + } + + return 0; +} + +static inline void +fexit_update_maps(u32 id, struct bpf_perf_event_value *after) +{ + struct bpf_perf_event_value *before, diff, *accum; + + before = bpf_map_lookup_elem(&fentry_readings, &id); + /* only account samples with a valid fentry_reading */ + if (before && before->counter) { + struct bpf_perf_event_value *accum; + + diff.counter = after->counter - before->counter; + diff.enabled = after->enabled - before->enabled; + diff.running = after->running - before->running; + + accum = bpf_map_lookup_elem(&accum_readings, &id); + if (accum) { + accum->counter += diff.counter; + accum->enabled += diff.enabled; + accum->running += diff.running; + } + } +} + +SEC("fexit/XXX") +int BPF_PROG(fexit_XXX) +{ + struct bpf_perf_event_value readings[MAX_NUM_MATRICS]; + u32 cpu = bpf_get_smp_processor_id(); + u32 i, one = 1, zero = 0; + int err; + u64 *count; + + /* read all events before updating the maps, to reduce error */ + for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) { + err = bpf_perf_event_read_value(&events, cpu + i * num_cpu, + readings + i, sizeof(*readings)); + if (err) + return 0; + } + count = bpf_map_lookup_elem(&counts, &zero); + if (count) { + *count += 1; + for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) + fexit_update_maps(i, &readings[i]); + } + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/bpf/bpftool/skeleton/profiler.h b/tools/bpf/bpftool/skeleton/profiler.h new file mode 100644 index 000000000000..1f767e9510f7 --- /dev/null +++ b/tools/bpf/bpftool/skeleton/profiler.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __PROFILER_H +#define __PROFILER_H + +/* useful typedefs from vmlinux.h */ + +typedef signed char __s8; +typedef unsigned char __u8; +typedef short int __s16; +typedef short unsigned int __u16; +typedef int __s32; +typedef unsigned int __u32; +typedef long long int __s64; +typedef long long unsigned int __u64; + +typedef __s8 s8; +typedef __u8 u8; +typedef __s16 s16; +typedef __u16 u16; +typedef __s32 s32; +typedef __u32 u32; +typedef __s64 s64; +typedef __u64 u64; + +enum { + false = 0, + true = 1, +}; + +#ifdef __CHECKER__ +#define __bitwise__ __attribute__((bitwise)) +#else +#define __bitwise__ +#endif + +typedef __u16 __bitwise__ __le16; +typedef __u16 __bitwise__ __be16; +typedef __u32 __bitwise__ __le32; +typedef __u32 __bitwise__ __be32; +typedef __u64 __bitwise__ __le64; +typedef __u64 __bitwise__ __be64; + +typedef __u16 __bitwise__ __sum16; +typedef __u32 __bitwise__ __wsum; + +#endif /* __PROFILER_H */ diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c index 48a39f72fadf..1f18a409f044 100644 --- a/tools/bpf/runqslower/runqslower.bpf.c +++ b/tools/bpf/runqslower/runqslower.bpf.c @@ -5,9 +5,7 @@ #include "runqslower.h" #define TASK_RUNNING 0 - -#define BPF_F_INDEX_MASK 0xffffffffULL -#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK +#define BPF_F_CURRENT_CPU 0xffffffffULL const volatile __u64 min_us = 0; const volatile pid_t targ_pid = 0; diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 7ac0d8088565..ab8e89a7009c 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -67,7 +67,8 @@ FILES= \ test-llvm.bin \ test-llvm-version.bin \ test-libaio.bin \ - test-libzstd.bin + test-libzstd.bin \ + test-clang-bpf-global-var.bin FILES := $(addprefix $(OUTPUT),$(FILES)) @@ -75,6 +76,7 @@ CC ?= $(CROSS_COMPILE)gcc CXX ?= $(CROSS_COMPILE)g++ PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config LLVM_CONFIG ?= llvm-config +CLANG ?= clang all: $(FILES) @@ -321,6 +323,11 @@ $(OUTPUT)test-libaio.bin: $(OUTPUT)test-libzstd.bin: $(BUILD) -lzstd +$(OUTPUT)test-clang-bpf-global-var.bin: + $(CLANG) -S -g -target bpf -o - $(patsubst %.bin,%.c,$(@F)) | \ + grep BTF_KIND_VAR + + ############################### clean: diff --git a/tools/build/feature/test-clang-bpf-global-var.c b/tools/build/feature/test-clang-bpf-global-var.c new file mode 100644 index 000000000000..221f1481d52e --- /dev/null +++ b/tools/build/feature/test-clang-bpf-global-var.c @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +volatile int global_value_for_test = 1; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 906e9f2752db..5d01c5c7e598 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -73,7 +73,7 @@ struct bpf_insn { /* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ struct bpf_lpm_trie_key { __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ - __u8 data[0]; /* Arbitrary size */ + __u8 data[]; /* Arbitrary size */ }; struct bpf_cgroup_storage_key { @@ -210,6 +210,7 @@ enum bpf_attach_type { BPF_TRACE_RAW_TP, BPF_TRACE_FENTRY, BPF_TRACE_FEXIT, + BPF_MODIFY_RETURN, __MAX_BPF_ATTACH_TYPE }; @@ -325,44 +326,46 @@ enum bpf_attach_type { #define BPF_PSEUDO_CALL 1 /* flags for BPF_MAP_UPDATE_ELEM command */ -#define BPF_ANY 0 /* create new element or update existing */ -#define BPF_NOEXIST 1 /* create new element if it didn't exist */ -#define BPF_EXIST 2 /* update existing element */ -#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */ +enum { + BPF_ANY = 0, /* create new element or update existing */ + BPF_NOEXIST = 1, /* create new element if it didn't exist */ + BPF_EXIST = 2, /* update existing element */ + BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */ +}; /* flags for BPF_MAP_CREATE command */ -#define BPF_F_NO_PREALLOC (1U << 0) +enum { + BPF_F_NO_PREALLOC = (1U << 0), /* Instead of having one common LRU list in the * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list * which can scale and perform better. * Note, the LRU nodes (including free nodes) cannot be moved * across different LRU lists. */ -#define BPF_F_NO_COMMON_LRU (1U << 1) + BPF_F_NO_COMMON_LRU = (1U << 1), /* Specify numa node during map creation */ -#define BPF_F_NUMA_NODE (1U << 2) - -#define BPF_OBJ_NAME_LEN 16U + BPF_F_NUMA_NODE = (1U << 2), /* Flags for accessing BPF object from syscall side. */ -#define BPF_F_RDONLY (1U << 3) -#define BPF_F_WRONLY (1U << 4) + BPF_F_RDONLY = (1U << 3), + BPF_F_WRONLY = (1U << 4), /* Flag for stack_map, store build_id+offset instead of pointer */ -#define BPF_F_STACK_BUILD_ID (1U << 5) + BPF_F_STACK_BUILD_ID = (1U << 5), /* Zero-initialize hash function seed. This should only be used for testing. */ -#define BPF_F_ZERO_SEED (1U << 6) + BPF_F_ZERO_SEED = (1U << 6), /* Flags for accessing BPF object from program side. */ -#define BPF_F_RDONLY_PROG (1U << 7) -#define BPF_F_WRONLY_PROG (1U << 8) + BPF_F_RDONLY_PROG = (1U << 7), + BPF_F_WRONLY_PROG = (1U << 8), /* Clone map from listener for newly accepted socket */ -#define BPF_F_CLONE (1U << 9) + BPF_F_CLONE = (1U << 9), /* Enable memory-mapping BPF map */ -#define BPF_F_MMAPABLE (1U << 10) + BPF_F_MMAPABLE = (1U << 10), +}; /* Flags for BPF_PROG_QUERY. */ @@ -391,6 +394,8 @@ struct bpf_stack_build_id { }; }; +#define BPF_OBJ_NAME_LEN 16U + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -2909,6 +2914,42 @@ union bpf_attr { * of sizeof(struct perf_branch_entry). * * **-ENOENT** if architecture does not support branch records. + * + * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) + * Description + * Returns 0 on success, values for *pid* and *tgid* as seen from the current + * *namespace* will be returned in *nsdata*. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** if dev and inum supplied don't match dev_t and inode number + * with nsfs of current task, or if dev conversion to dev_t lost high bits. + * + * **-ENOENT** if pidns does not exists for the current task. + * + * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * Description + * Write raw *data* blob into a special BPF perf event held by + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf + * event must have the following attributes: **PERF_SAMPLE_RAW** + * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and + * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. + * + * The *flags* are used to indicate the index in *map* for which + * the value must be put, masked with **BPF_F_INDEX_MASK**. + * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** + * to indicate that the index of the current CPU core should be + * used. + * + * The value to write, of *size*, is passed through eBPF stack and + * pointed by *data*. + * + * *ctx* is a pointer to in-kernel struct xdp_buff. + * + * This helper is similar to **bpf_perf_eventoutput**\ () but + * restricted to raw_tracepoint bpf programs. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3030,7 +3071,9 @@ union bpf_attr { FN(tcp_send_ack), \ FN(send_signal_thread), \ FN(jiffies64), \ - FN(read_branch_records), + FN(read_branch_records), \ + FN(get_ns_current_pid_tgid), \ + FN(xdp_output), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3045,72 +3088,100 @@ enum bpf_func_id { /* All flags used by eBPF helper functions, placed here. */ /* BPF_FUNC_skb_store_bytes flags. */ -#define BPF_F_RECOMPUTE_CSUM (1ULL << 0) -#define BPF_F_INVALIDATE_HASH (1ULL << 1) +enum { + BPF_F_RECOMPUTE_CSUM = (1ULL << 0), + BPF_F_INVALIDATE_HASH = (1ULL << 1), +}; /* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags. * First 4 bits are for passing the header field size. */ -#define BPF_F_HDR_FIELD_MASK 0xfULL +enum { + BPF_F_HDR_FIELD_MASK = 0xfULL, +}; /* BPF_FUNC_l4_csum_replace flags. */ -#define BPF_F_PSEUDO_HDR (1ULL << 4) -#define BPF_F_MARK_MANGLED_0 (1ULL << 5) -#define BPF_F_MARK_ENFORCE (1ULL << 6) +enum { + BPF_F_PSEUDO_HDR = (1ULL << 4), + BPF_F_MARK_MANGLED_0 = (1ULL << 5), + BPF_F_MARK_ENFORCE = (1ULL << 6), +}; /* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ -#define BPF_F_INGRESS (1ULL << 0) +enum { + BPF_F_INGRESS = (1ULL << 0), +}; /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ -#define BPF_F_TUNINFO_IPV6 (1ULL << 0) +enum { + BPF_F_TUNINFO_IPV6 = (1ULL << 0), +}; /* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */ -#define BPF_F_SKIP_FIELD_MASK 0xffULL -#define BPF_F_USER_STACK (1ULL << 8) +enum { + BPF_F_SKIP_FIELD_MASK = 0xffULL, + BPF_F_USER_STACK = (1ULL << 8), /* flags used by BPF_FUNC_get_stackid only. */ -#define BPF_F_FAST_STACK_CMP (1ULL << 9) -#define BPF_F_REUSE_STACKID (1ULL << 10) + BPF_F_FAST_STACK_CMP = (1ULL << 9), + BPF_F_REUSE_STACKID = (1ULL << 10), /* flags used by BPF_FUNC_get_stack only. */ -#define BPF_F_USER_BUILD_ID (1ULL << 11) + BPF_F_USER_BUILD_ID = (1ULL << 11), +}; /* BPF_FUNC_skb_set_tunnel_key flags. */ -#define BPF_F_ZERO_CSUM_TX (1ULL << 1) -#define BPF_F_DONT_FRAGMENT (1ULL << 2) -#define BPF_F_SEQ_NUMBER (1ULL << 3) +enum { + BPF_F_ZERO_CSUM_TX = (1ULL << 1), + BPF_F_DONT_FRAGMENT = (1ULL << 2), + BPF_F_SEQ_NUMBER = (1ULL << 3), +}; /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and * BPF_FUNC_perf_event_read_value flags. */ -#define BPF_F_INDEX_MASK 0xffffffffULL -#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK +enum { + BPF_F_INDEX_MASK = 0xffffffffULL, + BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK, /* BPF_FUNC_perf_event_output for sk_buff input context. */ -#define BPF_F_CTXLEN_MASK (0xfffffULL << 32) + BPF_F_CTXLEN_MASK = (0xfffffULL << 32), +}; /* Current network namespace */ -#define BPF_F_CURRENT_NETNS (-1L) +enum { + BPF_F_CURRENT_NETNS = (-1L), +}; /* BPF_FUNC_skb_adjust_room flags. */ -#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0) +enum { + BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0), + BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 = (1ULL << 1), + BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2), + BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), + BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), +}; -#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff -#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56 +enum { + BPF_ADJ_ROOM_ENCAP_L2_MASK = 0xff, + BPF_ADJ_ROOM_ENCAP_L2_SHIFT = 56, +}; -#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1) -#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2) -#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3) -#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4) #define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \ BPF_ADJ_ROOM_ENCAP_L2_MASK) \ << BPF_ADJ_ROOM_ENCAP_L2_SHIFT) /* BPF_FUNC_sysctl_get_name flags. */ -#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0) +enum { + BPF_F_SYSCTL_BASE_NAME = (1ULL << 0), +}; /* BPF_FUNC_sk_storage_get flags */ -#define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0) +enum { + BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0), +}; /* BPF_FUNC_read_branch_records flags. */ -#define BPF_F_GET_BRANCH_RECORDS_SIZE (1ULL << 0) +enum { + BPF_F_GET_BRANCH_RECORDS_SIZE = (1ULL << 0), +}; /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { @@ -3176,6 +3247,7 @@ struct __sk_buff { __u32 wire_len; __u32 gso_segs; __bpf_md_ptr(struct bpf_sock *, sk); + __u32 gso_size; }; struct bpf_tunnel_key { @@ -3528,13 +3600,14 @@ struct bpf_sock_ops { }; /* Definitions for bpf_sock_ops_cb_flags */ -#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) -#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) -#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) -#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3) -#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently - * supported cb flags - */ +enum { + BPF_SOCK_OPS_RTO_CB_FLAG = (1<<0), + BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1), + BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2), + BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3), +/* Mask of all currently supported cb flags */ + BPF_SOCK_OPS_ALL_CB_FLAGS = 0xF, +}; /* List of known BPF sock_ops operators. * New entries can only be added at the end @@ -3613,8 +3686,10 @@ enum { BPF_TCP_MAX_STATES /* Leave at the end! */ }; -#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ -#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ +enum { + TCP_BPF_IW = 1001, /* Set TCP initial congestion window */ + TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */ +}; struct bpf_perf_event_value { __u64 counter; @@ -3622,12 +3697,16 @@ struct bpf_perf_event_value { __u64 running; }; -#define BPF_DEVCG_ACC_MKNOD (1ULL << 0) -#define BPF_DEVCG_ACC_READ (1ULL << 1) -#define BPF_DEVCG_ACC_WRITE (1ULL << 2) +enum { + BPF_DEVCG_ACC_MKNOD = (1ULL << 0), + BPF_DEVCG_ACC_READ = (1ULL << 1), + BPF_DEVCG_ACC_WRITE = (1ULL << 2), +}; -#define BPF_DEVCG_DEV_BLOCK (1ULL << 0) -#define BPF_DEVCG_DEV_CHAR (1ULL << 1) +enum { + BPF_DEVCG_DEV_BLOCK = (1ULL << 0), + BPF_DEVCG_DEV_CHAR = (1ULL << 1), +}; struct bpf_cgroup_dev_ctx { /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ @@ -3643,8 +3722,10 @@ struct bpf_raw_tracepoint_args { /* DIRECT: Skip the FIB rules and go to FIB table associated with device * OUTPUT: Do lookup from egress perspective; default is ingress */ -#define BPF_FIB_LOOKUP_DIRECT (1U << 0) -#define BPF_FIB_LOOKUP_OUTPUT (1U << 1) +enum { + BPF_FIB_LOOKUP_DIRECT = (1U << 0), + BPF_FIB_LOOKUP_OUTPUT = (1U << 1), +}; enum { BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ @@ -3716,9 +3797,11 @@ enum bpf_task_fd_type { BPF_FD_TYPE_URETPROBE, /* filename + offset */ }; -#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0) -#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1) -#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2) +enum { + BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG = (1U << 0), + BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL = (1U << 1), + BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP = (1U << 2), +}; struct bpf_flow_keys { __u16 nhoff; @@ -3784,4 +3867,8 @@ struct bpf_sockopt { __s32 retval; }; +struct bpf_pidns_info { + __u32 pid; + __u32 tgid; +}; #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/testing/selftests/bpf/include/uapi/linux/types.h b/tools/include/uapi/linux/types.h index 91fa51a9c31d..91fa51a9c31d 100644 --- a/tools/testing/selftests/bpf/include/uapi/linux/types.h +++ b/tools/include/uapi/linux/types.h diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index b0dafe8b4ebc..b0c9ae5c73b5 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -49,7 +49,8 @@ #if defined(bpf_target_x86) -#ifdef __KERNEL__ +#if defined(__KERNEL__) || defined(__VMLINUX_H__) + #define PT_REGS_PARM1(x) ((x)->di) #define PT_REGS_PARM2(x) ((x)->si) #define PT_REGS_PARM3(x) ((x)->dx) @@ -60,7 +61,20 @@ #define PT_REGS_RC(x) ((x)->ax) #define PT_REGS_SP(x) ((x)->sp) #define PT_REGS_IP(x) ((x)->ip) + +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), di) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), si) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), dx) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), cx) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), sp) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), bp) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), ax) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), ip) + #else + #ifdef __i386__ /* i386 kernel is built with -mregparm=3 */ #define PT_REGS_PARM1(x) ((x)->eax) @@ -73,7 +87,20 @@ #define PT_REGS_RC(x) ((x)->eax) #define PT_REGS_SP(x) ((x)->esp) #define PT_REGS_IP(x) ((x)->eip) + +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), eax) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), edx) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), ecx) +#define PT_REGS_PARM4_CORE(x) 0 +#define PT_REGS_PARM5_CORE(x) 0 +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), esp) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), ebp) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), eax) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), esp) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), eip) + #else + #define PT_REGS_PARM1(x) ((x)->rdi) #define PT_REGS_PARM2(x) ((x)->rsi) #define PT_REGS_PARM3(x) ((x)->rdx) @@ -84,6 +111,18 @@ #define PT_REGS_RC(x) ((x)->rax) #define PT_REGS_SP(x) ((x)->rsp) #define PT_REGS_IP(x) ((x)->rip) + +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), rdi) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), rsi) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), rdx) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), rcx) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), rsp) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), rbp) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), rax) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), rsp) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), rip) + #endif #endif @@ -104,6 +143,17 @@ struct pt_regs; #define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15]) #define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr) +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2]) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[3]) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[4]) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[5]) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[6]) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), grps[14]) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[11]) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2]) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[15]) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), pdw.addr) + #elif defined(bpf_target_arm) #define PT_REGS_PARM1(x) ((x)->uregs[0]) @@ -117,6 +167,17 @@ struct pt_regs; #define PT_REGS_SP(x) ((x)->uregs[13]) #define PT_REGS_IP(x) ((x)->uregs[12]) +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), uregs[0]) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), uregs[1]) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), uregs[2]) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), uregs[3]) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), uregs[4]) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), uregs[14]) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), uregs[11]) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), uregs[0]) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), uregs[13]) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), uregs[12]) + #elif defined(bpf_target_arm64) /* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */ @@ -134,6 +195,17 @@ struct pt_regs; #define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp) #define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc) +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0]) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[1]) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[2]) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[3]) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[4]) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[30]) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[29]) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0]) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), sp) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), pc) + #elif defined(bpf_target_mips) #define PT_REGS_PARM1(x) ((x)->regs[4]) @@ -147,6 +219,17 @@ struct pt_regs; #define PT_REGS_SP(x) ((x)->regs[29]) #define PT_REGS_IP(x) ((x)->cp0_epc) +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), regs[4]) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), regs[5]) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), regs[6]) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), regs[7]) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), regs[8]) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), regs[31]) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), regs[30]) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[1]) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), regs[29]) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), cp0_epc) + #elif defined(bpf_target_powerpc) #define PT_REGS_PARM1(x) ((x)->gpr[3]) @@ -158,6 +241,15 @@ struct pt_regs; #define PT_REGS_SP(x) ((x)->sp) #define PT_REGS_IP(x) ((x)->nip) +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), gpr[3]) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), gpr[4]) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), gpr[5]) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), gpr[6]) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), gpr[7]) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), gpr[3]) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), nip) + #elif defined(bpf_target_sparc) #define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0]) @@ -169,11 +261,22 @@ struct pt_regs; #define PT_REGS_RC(x) ((x)->u_regs[UREG_I0]) #define PT_REGS_SP(x) ((x)->u_regs[UREG_FP]) +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0]) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I1]) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I2]) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I3]) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I4]) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I7]) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0]) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), u_regs[UREG_FP]) + /* Should this also be a bpf_target check for the sparc case? */ #if defined(__arch64__) #define PT_REGS_IP(x) ((x)->tpc) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), tpc) #else #define PT_REGS_IP(x) ((x)->pc) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), pc) #endif #endif @@ -192,4 +295,122 @@ struct pt_regs; (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) #endif +#define ___bpf_concat(a, b) a ## b +#define ___bpf_apply(fn, n) ___bpf_concat(fn, n) +#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N +#define ___bpf_narg(...) \ + ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +#define ___bpf_empty(...) \ + ___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0) + +#define ___bpf_ctx_cast0() ctx +#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0] +#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1] +#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2] +#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3] +#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4] +#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5] +#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6] +#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7] +#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8] +#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9] +#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10] +#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11] +#define ___bpf_ctx_cast(args...) \ + ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args) + +/* + * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and + * similar kinds of BPF programs, that accept input arguments as a single + * pointer to untyped u64 array, where each u64 can actually be a typed + * pointer or integer of different size. Instead of requring user to write + * manual casts and work with array elements by index, BPF_PROG macro + * allows user to declare a list of named and typed input arguments in the + * same syntax as for normal C function. All the casting is hidden and + * performed transparently, while user code can just assume working with + * function arguments of specified type and name. + * + * Original raw context argument is preserved as well as 'ctx' argument. + * This is useful when using BPF helpers that expect original context + * as one of the parameters (e.g., for bpf_perf_event_output()). + */ +#define BPF_PROG(name, args...) \ +name(unsigned long long *ctx); \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(unsigned long long *ctx, ##args); \ +typeof(name(0)) name(unsigned long long *ctx) \ +{ \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + return ____##name(___bpf_ctx_cast(args)); \ + _Pragma("GCC diagnostic pop") \ +} \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(unsigned long long *ctx, ##args) + +struct pt_regs; + +#define ___bpf_kprobe_args0() ctx +#define ___bpf_kprobe_args1(x) \ + ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx) +#define ___bpf_kprobe_args2(x, args...) \ + ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx) +#define ___bpf_kprobe_args3(x, args...) \ + ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx) +#define ___bpf_kprobe_args4(x, args...) \ + ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx) +#define ___bpf_kprobe_args5(x, args...) \ + ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx) +#define ___bpf_kprobe_args(args...) \ + ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args) + +/* + * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for + * tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific + * low-level way of getting kprobe input arguments from struct pt_regs, and + * provides a familiar typed and named function arguments syntax and + * semantics of accessing kprobe input paremeters. + * + * Original struct pt_regs* context is preserved as 'ctx' argument. This might + * be necessary when using BPF helpers like bpf_perf_event_output(). + */ +#define BPF_KPROBE(name, args...) \ +name(struct pt_regs *ctx); \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(struct pt_regs *ctx, ##args); \ +typeof(name(0)) name(struct pt_regs *ctx) \ +{ \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + return ____##name(___bpf_kprobe_args(args)); \ + _Pragma("GCC diagnostic pop") \ +} \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(struct pt_regs *ctx, ##args) + +#define ___bpf_kretprobe_args0() ctx +#define ___bpf_kretprobe_args1(x) \ + ___bpf_kretprobe_args0(), (void *)PT_REGS_RET(ctx) +#define ___bpf_kretprobe_args(args...) \ + ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args) + +/* + * BPF_KRETPROBE is similar to BPF_KPROBE, except, it only provides optional + * return value (in addition to `struct pt_regs *ctx`), but no input + * arguments, because they will be clobbered by the time probed function + * returns. + */ +#define BPF_KRETPROBE(name, args...) \ +name(struct pt_regs *ctx); \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(struct pt_regs *ctx, ##args); \ +typeof(name(0)) name(struct pt_regs *ctx) \ +{ \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + return ____##name(___bpf_kretprobe_args(args)); \ + _Pragma("GCC diagnostic pop") \ +} \ +static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) + #endif diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index bd09ed1710f1..0c28ee82834b 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -916,13 +916,13 @@ static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id, /* enumerators share namespace with typedef idents */ dup_cnt = btf_dump_name_dups(d, d->ident_names, name); if (dup_cnt > 1) { - btf_dump_printf(d, "\n%s%s___%zu = %d,", + btf_dump_printf(d, "\n%s%s___%zu = %u,", pfx(lvl + 1), name, dup_cnt, - (__s32)v->val); + (__u32)v->val); } else { - btf_dump_printf(d, "\n%s%s = %d,", + btf_dump_printf(d, "\n%s%s = %u,", pfx(lvl + 1), name, - (__s32)v->val); + (__u32)v->val); } } btf_dump_printf(d, "\n%s}", pfx(lvl)); @@ -1030,7 +1030,7 @@ int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id, if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts)) return -EINVAL; - fname = OPTS_GET(opts, field_name, NULL); + fname = OPTS_GET(opts, field_name, ""); lvl = OPTS_GET(opts, indent_level, 0); btf_dump_emit_type_decl(d, id, fname, lvl); return 0; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 996162801f7a..085e41f9b68e 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2284,9 +2284,16 @@ static void bpf_object__sanitize_btf_ext(struct bpf_object *obj) } } -static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj) +static bool libbpf_needs_btf(const struct bpf_object *obj) { - return obj->efile.st_ops_shndx >= 0 || obj->nr_extern > 0; + return obj->efile.btf_maps_shndx >= 0 || + obj->efile.st_ops_shndx >= 0 || + obj->nr_extern > 0; +} + +static bool kernel_needs_btf(const struct bpf_object *obj) +{ + return obj->efile.st_ops_shndx >= 0; } static int bpf_object__init_btf(struct bpf_object *obj, @@ -2322,7 +2329,7 @@ static int bpf_object__init_btf(struct bpf_object *obj, } } out: - if (err && bpf_object__is_btf_mandatory(obj)) { + if (err && libbpf_needs_btf(obj)) { pr_warn("BTF is required, but is missing or corrupted.\n"); return err; } @@ -2346,7 +2353,7 @@ static int bpf_object__finalize_btf(struct bpf_object *obj) btf_ext__free(obj->btf_ext); obj->btf_ext = NULL; - if (bpf_object__is_btf_mandatory(obj)) { + if (libbpf_needs_btf(obj)) { pr_warn("BTF is required, but is missing or corrupted.\n"); return -ENOENT; } @@ -2410,7 +2417,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) obj->btf_ext = NULL; } - if (bpf_object__is_btf_mandatory(obj)) + if (kernel_needs_btf(obj)) return err; } return 0; @@ -3866,6 +3873,10 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf, if (str_is_empty(targ_name)) continue; + t = skip_mods_and_typedefs(targ_btf, i, NULL); + if (!btf_is_composite(t) && !btf_is_array(t)) + continue; + targ_essent_len = bpf_core_essential_name_len(targ_name); if (targ_essent_len != local_essent_len) continue; @@ -6288,6 +6299,10 @@ static const struct bpf_sec_def section_defs[] = { .expected_attach_type = BPF_TRACE_FENTRY, .is_attach_btf = true, .attach_fn = attach_trace), + SEC_DEF("fmod_ret/", TRACING, + .expected_attach_type = BPF_MODIFY_RETURN, + .is_attach_btf = true, + .attach_fn = attach_trace), SEC_DEF("fexit/", TRACING, .expected_attach_type = BPF_TRACE_FEXIT, .is_attach_btf = true, @@ -6931,6 +6946,8 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, struct bpf_link { int (*detach)(struct bpf_link *link); int (*destroy)(struct bpf_link *link); + char *pin_path; /* NULL, if not pinned */ + int fd; /* hook FD, -1 if not applicable */ bool disconnected; }; @@ -6960,26 +6977,109 @@ int bpf_link__destroy(struct bpf_link *link) err = link->detach(link); if (link->destroy) link->destroy(link); + if (link->pin_path) + free(link->pin_path); free(link); return err; } -struct bpf_link_fd { - struct bpf_link link; /* has to be at the top of struct */ - int fd; /* hook FD */ -}; +int bpf_link__fd(const struct bpf_link *link) +{ + return link->fd; +} + +const char *bpf_link__pin_path(const struct bpf_link *link) +{ + return link->pin_path; +} + +static int bpf_link__detach_fd(struct bpf_link *link) +{ + return close(link->fd); +} + +struct bpf_link *bpf_link__open(const char *path) +{ + struct bpf_link *link; + int fd; + + fd = bpf_obj_get(path); + if (fd < 0) { + fd = -errno; + pr_warn("failed to open link at %s: %d\n", path, fd); + return ERR_PTR(fd); + } + + link = calloc(1, sizeof(*link)); + if (!link) { + close(fd); + return ERR_PTR(-ENOMEM); + } + link->detach = &bpf_link__detach_fd; + link->fd = fd; + + link->pin_path = strdup(path); + if (!link->pin_path) { + bpf_link__destroy(link); + return ERR_PTR(-ENOMEM); + } + + return link; +} + +int bpf_link__pin(struct bpf_link *link, const char *path) +{ + int err; + + if (link->pin_path) + return -EBUSY; + err = make_parent_dir(path); + if (err) + return err; + err = check_path(path); + if (err) + return err; + + link->pin_path = strdup(path); + if (!link->pin_path) + return -ENOMEM; + + if (bpf_obj_pin(link->fd, link->pin_path)) { + err = -errno; + zfree(&link->pin_path); + return err; + } + + pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path); + return 0; +} + +int bpf_link__unpin(struct bpf_link *link) +{ + int err; + + if (!link->pin_path) + return -EINVAL; + + err = unlink(link->pin_path); + if (err != 0) + return -errno; + + pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path); + zfree(&link->pin_path); + return 0; +} static int bpf_link__detach_perf_event(struct bpf_link *link) { - struct bpf_link_fd *l = (void *)link; int err; - err = ioctl(l->fd, PERF_EVENT_IOC_DISABLE, 0); + err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0); if (err) err = -errno; - close(l->fd); + close(link->fd); return err; } @@ -6987,7 +7087,7 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd) { char errmsg[STRERR_BUFSIZE]; - struct bpf_link_fd *link; + struct bpf_link *link; int prog_fd, err; if (pfd < 0) { @@ -7005,7 +7105,7 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, link = calloc(1, sizeof(*link)); if (!link) return ERR_PTR(-ENOMEM); - link->link.detach = &bpf_link__detach_perf_event; + link->detach = &bpf_link__detach_perf_event; link->fd = pfd; if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { @@ -7024,7 +7124,7 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); return ERR_PTR(err); } - return (struct bpf_link *)link; + return link; } /* @@ -7312,18 +7412,11 @@ out: return link; } -static int bpf_link__detach_fd(struct bpf_link *link) -{ - struct bpf_link_fd *l = (void *)link; - - return close(l->fd); -} - struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, const char *tp_name) { char errmsg[STRERR_BUFSIZE]; - struct bpf_link_fd *link; + struct bpf_link *link; int prog_fd, pfd; prog_fd = bpf_program__fd(prog); @@ -7336,7 +7429,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, link = calloc(1, sizeof(*link)); if (!link) return ERR_PTR(-ENOMEM); - link->link.detach = &bpf_link__detach_fd; + link->detach = &bpf_link__detach_fd; pfd = bpf_raw_tracepoint_open(tp_name, prog_fd); if (pfd < 0) { @@ -7348,7 +7441,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, return ERR_PTR(pfd); } link->fd = pfd; - return (struct bpf_link *)link; + return link; } static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec, @@ -7362,7 +7455,7 @@ static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec, struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) { char errmsg[STRERR_BUFSIZE]; - struct bpf_link_fd *link; + struct bpf_link *link; int prog_fd, pfd; prog_fd = bpf_program__fd(prog); @@ -7375,7 +7468,7 @@ struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) link = calloc(1, sizeof(*link)); if (!link) return ERR_PTR(-ENOMEM); - link->link.detach = &bpf_link__detach_fd; + link->detach = &bpf_link__detach_fd; pfd = bpf_raw_tracepoint_open(NULL, prog_fd); if (pfd < 0) { @@ -7409,10 +7502,9 @@ struct bpf_link *bpf_program__attach(struct bpf_program *prog) static int bpf_link__detach_struct_ops(struct bpf_link *link) { - struct bpf_link_fd *l = (void *)link; __u32 zero = 0; - if (bpf_map_delete_elem(l->fd, &zero)) + if (bpf_map_delete_elem(link->fd, &zero)) return -errno; return 0; @@ -7421,7 +7513,7 @@ static int bpf_link__detach_struct_ops(struct bpf_link *link) struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map) { struct bpf_struct_ops *st_ops; - struct bpf_link_fd *link; + struct bpf_link *link; __u32 i, zero = 0; int err; @@ -7453,10 +7545,10 @@ struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map) return ERR_PTR(err); } - link->link.detach = bpf_link__detach_struct_ops; + link->detach = bpf_link__detach_struct_ops; link->fd = map->fd; - return (struct bpf_link *)link; + return link; } enum bpf_perf_event_ret diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 02fc58a21a7f..d38d7a629417 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -219,6 +219,11 @@ LIBBPF_API void bpf_program__unload(struct bpf_program *prog); struct bpf_link; +LIBBPF_API struct bpf_link *bpf_link__open(const char *path); +LIBBPF_API int bpf_link__fd(const struct bpf_link *link); +LIBBPF_API const char *bpf_link__pin_path(const struct bpf_link *link); +LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path); +LIBBPF_API int bpf_link__unpin(struct bpf_link *link); LIBBPF_API void bpf_link__disconnect(struct bpf_link *link); LIBBPF_API int bpf_link__destroy(struct bpf_link *link); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 7b014c8cdece..5129283c0284 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -238,5 +238,10 @@ LIBBPF_0.0.7 { LIBBPF_0.0.8 { global: + bpf_link__fd; + bpf_link__open; + bpf_link__pin; + bpf_link__pin_path; + bpf_link__unpin; bpf_program__set_attach_target; } LIBBPF_0.0.7; diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index c4dd23c4b478..8ead55593984 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -239,7 +239,6 @@ buildid.*:: set buildid.dir to /dev/null. The default is $HOME/.debug annotate.*:: - These options work only for TUI. These are in control of addresses, jump function, source code in lines of assembly code from a specific program. @@ -269,6 +268,8 @@ annotate.*:: │ mov (%rdi),%rdx │ return n; + This option works with tui, stdio2 browsers. + annotate.use_offset:: Basing on a first address of a loaded function, offset can be used. Instead of using original addresses of assembly code, @@ -287,6 +288,8 @@ annotate.*:: 368:│ mov 0x8(%r14),%rdi + This option works with tui, stdio2 browsers. + annotate.jump_arrows:: There can be jump instruction among assembly code. Depending on a boolean value of jump_arrows, @@ -306,6 +309,8 @@ annotate.*:: │1330: mov %r15,%r10 │1333: cmp %r15,%r14 + This option works with tui browser. + annotate.show_linenr:: When showing source code if this option is 'true', line numbers are printed as below. @@ -325,6 +330,8 @@ annotate.*:: │ array++; │ } + This option works with tui, stdio2 browsers. + annotate.show_nr_jumps:: Let's see a part of assembly code. @@ -335,6 +342,8 @@ annotate.*:: │1 1382: movb $0x1,-0x270(%rbp) + This option works with tui, stdio2 browsers. + annotate.show_total_period:: To compare two records on an instruction base, with this option provided, display total number of samples that belong to a line @@ -348,11 +357,30 @@ annotate.*:: 99.93 │ mov %eax,%eax + This option works with tui, stdio2, stdio browsers. + + annotate.show_nr_samples:: + By default perf annotate shows percentage of samples. This option + can be used to print absolute number of samples. Ex, when set as + false: + + Percent│ + 74.03 │ mov %fs:0x28,%rax + + When set as true: + + Samples│ + 6 │ mov %fs:0x28,%rax + + This option works with tui, stdio2, stdio browsers. + annotate.offset_level:: Default is '1', meaning just jump targets will have offsets show right beside the instruction. When set to '2' 'call' instructions will also have its offsets shown, 3 or higher will show offsets for all instructions. + This option works with tui, stdio2 browsers. + hist.*:: hist.percentage:: This option control the way to calculate overhead of filtered entries - @@ -490,6 +518,12 @@ top.*:: column by default. The default is 'true'. + top.call-graph:: + This is identical to 'call-graph.record-mode', except it is + applicable only for 'top' subcommand. This option ONLY setup + the unwind method. To enable 'perf top' to actually use it, + the command line option -g must be specified. + man.*:: man.viewer:: This option can assign a tool to view manual pages when 'help' @@ -517,6 +551,16 @@ record.*:: But if this option is 'no-cache', it will not update the build-id cache. 'skip' skips post-processing and does not update the cache. + record.call-graph:: + This is identical to 'call-graph.record-mode', except it is + applicable only for 'record' subcommand. This option ONLY setup + the unwind method. To enable 'perf record' to actually use it, + the command line option -g must be specified. + + record.aio:: + Use 'n' control blocks in asynchronous (Posix AIO) trace writing + mode ('n' default: 1, max: 4). + diff.*:: diff.order:: This option sets the number of columns to sort the result. @@ -566,6 +610,11 @@ trace.*:: "libbeauty", the default, to use the same argument beautifiers used in the strace-like sys_enter+sys_exit lines. +ftrace.*:: + ftrace.tracer:: + Can be used to select the default tracer. Possible values are + 'function' and 'function_graph'. + llvm.*:: llvm.clang-path:: Path to clang. If omit, search it from $PATH. @@ -610,6 +659,29 @@ scripts.*:: The script gets the same options passed as a full perf script, in particular -i perfdata file, --cpu, --tid +convert.*:: + + convert.queue-size:: + Limit the size of ordered_events queue, so we could control + allocation size of perf data files without proper finished + round events. + +intel-pt.*:: + + intel-pt.cache-divisor:: + + intel-pt.mispred-all:: + If set, Intel PT decoder will set the mispred flag on all + branches. + +auxtrace.*:: + + auxtrace.dumpdir:: + s390 only. The directory to save the auxiliary trace buffer + can be changed using this option. Ex, auxtrace.dumpdir=/tmp. + If the directory does not exist or has the wrong file type, + the current directory is used. + SEE ALSO -------- linkperf:perf[1] diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 2898cfdf8fe1..941f814820b8 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -858,21 +858,6 @@ static void cs_etm_recording_free(struct auxtrace_record *itr) free(ptr); } -static int cs_etm_read_finish(struct auxtrace_record *itr, int idx) -{ - struct cs_etm_recording *ptr = - container_of(itr, struct cs_etm_recording, itr); - struct evsel *evsel; - - evlist__for_each_entry(ptr->evlist, evsel) { - if (evsel->core.attr.type == ptr->cs_etm_pmu->type) - return perf_evlist__enable_event_idx(ptr->evlist, - evsel, idx); - } - - return -EINVAL; -} - struct auxtrace_record *cs_etm_record_init(int *err) { struct perf_pmu *cs_etm_pmu; @@ -892,6 +877,7 @@ struct auxtrace_record *cs_etm_record_init(int *err) } ptr->cs_etm_pmu = cs_etm_pmu; + ptr->itr.pmu = cs_etm_pmu; ptr->itr.parse_snapshot_options = cs_etm_parse_snapshot_options; ptr->itr.recording_options = cs_etm_recording_options; ptr->itr.info_priv_size = cs_etm_info_priv_size; @@ -901,7 +887,7 @@ struct auxtrace_record *cs_etm_record_init(int *err) ptr->itr.snapshot_finish = cs_etm_snapshot_finish; ptr->itr.reference = cs_etm_reference; ptr->itr.free = cs_etm_recording_free; - ptr->itr.read_finish = cs_etm_read_finish; + ptr->itr.read_finish = auxtrace_record__read_finish; *err = 0; return &ptr->itr; diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index eba6541ec0f1..8d6821d9c3f6 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -158,20 +158,6 @@ static void arm_spe_recording_free(struct auxtrace_record *itr) free(sper); } -static int arm_spe_read_finish(struct auxtrace_record *itr, int idx) -{ - struct arm_spe_recording *sper = - container_of(itr, struct arm_spe_recording, itr); - struct evsel *evsel; - - evlist__for_each_entry(sper->evlist, evsel) { - if (evsel->core.attr.type == sper->arm_spe_pmu->type) - return perf_evlist__enable_event_idx(sper->evlist, - evsel, idx); - } - return -EINVAL; -} - struct auxtrace_record *arm_spe_recording_init(int *err, struct perf_pmu *arm_spe_pmu) { @@ -189,12 +175,13 @@ struct auxtrace_record *arm_spe_recording_init(int *err, } sper->arm_spe_pmu = arm_spe_pmu; + sper->itr.pmu = arm_spe_pmu; sper->itr.recording_options = arm_spe_recording_options; sper->itr.info_priv_size = arm_spe_info_priv_size; sper->itr.info_fill = arm_spe_info_fill; sper->itr.free = arm_spe_recording_free; sper->itr.reference = arm_spe_reference; - sper->itr.read_finish = arm_spe_read_finish; + sper->itr.read_finish = auxtrace_record__read_finish; sper->itr.alignment = 0; *err = 0; diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 43f736ed47f2..35b61bfc1b1a 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -517,3 +517,5 @@ 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open 435 nospu clone3 ppc_clone3 +437 common openat2 sys_openat2 +438 common pidfd_getfd sys_pidfd_getfd diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index 27d9e214d068..26cee1052179 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -413,20 +413,6 @@ out_err: return err; } -static int intel_bts_read_finish(struct auxtrace_record *itr, int idx) -{ - struct intel_bts_recording *btsr = - container_of(itr, struct intel_bts_recording, itr); - struct evsel *evsel; - - evlist__for_each_entry(btsr->evlist, evsel) { - if (evsel->core.attr.type == btsr->intel_bts_pmu->type) - return perf_evlist__enable_event_idx(btsr->evlist, - evsel, idx); - } - return -EINVAL; -} - struct auxtrace_record *intel_bts_recording_init(int *err) { struct perf_pmu *intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME); @@ -447,6 +433,7 @@ struct auxtrace_record *intel_bts_recording_init(int *err) } btsr->intel_bts_pmu = intel_bts_pmu; + btsr->itr.pmu = intel_bts_pmu; btsr->itr.recording_options = intel_bts_recording_options; btsr->itr.info_priv_size = intel_bts_info_priv_size; btsr->itr.info_fill = intel_bts_info_fill; @@ -456,7 +443,7 @@ struct auxtrace_record *intel_bts_recording_init(int *err) btsr->itr.find_snapshot = intel_bts_find_snapshot; btsr->itr.parse_snapshot_options = intel_bts_parse_snapshot_options; btsr->itr.reference = intel_bts_reference; - btsr->itr.read_finish = intel_bts_read_finish; + btsr->itr.read_finish = auxtrace_record__read_finish; btsr->itr.alignment = sizeof(struct branch); return &btsr->itr; } diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 20df442fdf36..7eea4fd7ce58 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -1166,20 +1166,6 @@ static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused) return rdtsc(); } -static int intel_pt_read_finish(struct auxtrace_record *itr, int idx) -{ - struct intel_pt_recording *ptr = - container_of(itr, struct intel_pt_recording, itr); - struct evsel *evsel; - - evlist__for_each_entry(ptr->evlist, evsel) { - if (evsel->core.attr.type == ptr->intel_pt_pmu->type) - return perf_evlist__enable_event_idx(ptr->evlist, evsel, - idx); - } - return -EINVAL; -} - struct auxtrace_record *intel_pt_recording_init(int *err) { struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME); @@ -1200,6 +1186,7 @@ struct auxtrace_record *intel_pt_recording_init(int *err) } ptr->intel_pt_pmu = intel_pt_pmu; + ptr->itr.pmu = intel_pt_pmu; ptr->itr.recording_options = intel_pt_recording_options; ptr->itr.info_priv_size = intel_pt_info_priv_size; ptr->itr.info_fill = intel_pt_info_fill; @@ -1209,7 +1196,7 @@ struct auxtrace_record *intel_pt_recording_init(int *err) ptr->itr.find_snapshot = intel_pt_find_snapshot; ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options; ptr->itr.reference = intel_pt_reference; - ptr->itr.read_finish = intel_pt_read_finish; + ptr->itr.read_finish = auxtrace_record__read_finish; /* * Decoding starts at a PSB packet. Minimum PSB period is 2K so 4K * should give at least 1 PSB per sample. diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index ff61795a4d13..6c0a0412502e 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -566,6 +566,8 @@ int cmd_annotate(int argc, const char **argv) if (ret < 0) return ret; + annotation_config__init(&annotate.opts); + argc = parse_options(argc, argv, options, annotate_usage, 0); if (argc) { /* @@ -605,8 +607,6 @@ int cmd_annotate(int argc, const char **argv) if (ret < 0) goto out_delete; - annotation_config__init(); - symbol_conf.try_vmlinux_path = true; ret = symbol__init(&annotate.session->header.env); diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 26bc5923e6b5..70548df2abb9 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -449,7 +449,8 @@ static int perf_del_probe_events(struct strfilter *filter) ret = probe_file__del_strlist(kfd, klist); if (ret < 0) goto error; - } + } else if (ret == -ENOMEM) + goto error; ret2 = probe_file__get_events(ufd, filter, ulist); if (ret2 == 0) { @@ -459,7 +460,8 @@ static int perf_del_probe_events(struct strfilter *filter) ret2 = probe_file__del_strlist(ufd, ulist); if (ret2 < 0) goto error; - } + } else if (ret2 == -ENOMEM) + goto error; if (ret == -ENOENT && ret2 == -ENOENT) pr_warning("\"%s\" does not hit any event.\n", str); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 9483b3f0cae3..72a12b69f120 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1507,7 +1507,7 @@ repeat: symbol_conf.priv_size += sizeof(u32); symbol_conf.sort_by_name = true; } - annotation_config__init(); + annotation_config__init(&report.annotation_opts); } if (symbol__init(&session->header.env) < 0) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 8affcab75604..f6dd1a63f159 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -143,7 +143,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) return err; } - err = symbol__annotate(&he->ms, evsel, 0, &top->annotation_opts, NULL); + err = symbol__annotate(&he->ms, evsel, &top->annotation_opts, NULL); if (err == 0) { top->sym_filter_entry = he; } else { @@ -1683,7 +1683,7 @@ int cmd_top(int argc, const char **argv) if (status < 0) goto out_delete_evlist; - annotation_config__init(); + annotation_config__init(&top.annotation_opts); symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); status = symbol__init(NULL); diff --git a/tools/perf/include/bpf/pid_filter.h b/tools/perf/include/bpf/pid_filter.h index 607189a315b2..6e61c4bdf548 100644 --- a/tools/perf/include/bpf/pid_filter.h +++ b/tools/perf/include/bpf/pid_filter.h @@ -3,7 +3,7 @@ #ifndef _PERF_BPF_PID_FILTER_ #define _PERF_BPF_PID_FILTER_ -#include <bpf/bpf.h> +#include <bpf.h> #define pid_filter(name) pid_map(name, bool) diff --git a/tools/perf/include/bpf/stdio.h b/tools/perf/include/bpf/stdio.h index 7ca6fa5463ee..316af5b2ff35 100644 --- a/tools/perf/include/bpf/stdio.h +++ b/tools/perf/include/bpf/stdio.h @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 -#include <bpf/bpf.h> +#include <bpf.h> struct bpf_map SEC("maps") __bpf_stdout__ = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, diff --git a/tools/perf/include/bpf/unistd.h b/tools/perf/include/bpf/unistd.h index d1a35b6c649d..ca7877f9a976 100644 --- a/tools/perf/include/bpf/unistd.h +++ b/tools/perf/include/bpf/unistd.h @@ -1,6 +1,6 @@ // SPDX-License-Identifier: LGPL-2.1 -#include <bpf/bpf.h> +#include <bpf.h> static int (*bpf_get_current_pid_tgid)(void) = (void *)BPF_FUNC_get_current_pid_tgid; diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh index 7cb99b433888..c2cc42daf924 100644 --- a/tools/perf/tests/shell/lib/probe_vfs_getname.sh +++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh @@ -14,7 +14,7 @@ add_probe_vfs_getname() { if [ $had_vfs_getname -eq 1 ] ; then line=$(perf probe -L getname_flags 2>&1 | egrep 'result.*=.*filename;' | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*/\1/') perf probe -q "vfs_getname=getname_flags:${line} pathname=result->name:string" || \ - perf probe $verbose "vfs_getname=getname_flags:${line} pathname=filename:string" + perf probe $verbose "vfs_getname=getname_flags:${line} pathname=filename:ustring" fi } diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index badbddbb30f8..9023267e5643 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -754,10 +754,9 @@ static int annotate_browser__run(struct annotate_browser *browser, "? Search string backwards\n"); continue; case 'r': - { - script_browse(NULL, NULL); - continue; - } + script_browse(NULL, NULL); + annotate_browser__show(&browser->b, title, help); + continue; case 'k': notes->options->show_linenr = !notes->options->show_linenr; break; @@ -834,13 +833,13 @@ show_sup_ins: map_symbol__annotation_dump(ms, evsel, browser->opts); continue; case 't': - if (notes->options->show_total_period) { - notes->options->show_total_period = false; - notes->options->show_nr_samples = true; - } else if (notes->options->show_nr_samples) - notes->options->show_nr_samples = false; + if (symbol_conf.show_total_period) { + symbol_conf.show_total_period = false; + symbol_conf.show_nr_samples = true; + } else if (symbol_conf.show_nr_samples) + symbol_conf.show_nr_samples = false; else - notes->options->show_total_period = true; + symbol_conf.show_total_period = true; annotation__update_column_widths(notes); continue; case 'c': diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index 22cc240f7371..35f9641bf670 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -174,7 +174,7 @@ static int symbol__gtk_annotate(struct map_symbol *ms, struct evsel *evsel, if (ms->map->dso->annotate_warned) return -1; - err = symbol__annotate(ms, evsel, 0, &annotation__default_options, NULL); + err = symbol__annotate(ms, evsel, &annotation__default_options, NULL); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(ms, err, msg, sizeof(msg)); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index ca73fb74ad03..0ea95be84b3b 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1143,93 +1143,70 @@ out: } struct annotate_args { - size_t privsize; - struct arch *arch; - struct map_symbol ms; - struct evsel *evsel; + struct arch *arch; + struct map_symbol ms; + struct evsel *evsel; struct annotation_options *options; - s64 offset; - char *line; - int line_nr; + s64 offset; + char *line; + int line_nr; }; -static void annotation_line__delete(struct annotation_line *al) +static void annotation_line__init(struct annotation_line *al, + struct annotate_args *args, + int nr) { - void *ptr = (void *) al - al->privsize; + al->offset = args->offset; + al->line = strdup(args->line); + al->line_nr = args->line_nr; + al->data_nr = nr; +} +static void annotation_line__exit(struct annotation_line *al) +{ free_srcline(al->path); zfree(&al->line); - free(ptr); } -/* - * Allocating the annotation line data with following - * structure: - * - * -------------------------------------- - * private space | struct annotation_line - * -------------------------------------- - * - * Size of the private space is stored in 'struct annotation_line'. - * - */ -static struct annotation_line * -annotation_line__new(struct annotate_args *args, size_t privsize) +static size_t disasm_line_size(int nr) { struct annotation_line *al; - struct evsel *evsel = args->evsel; - size_t size = privsize + sizeof(*al); - int nr = 1; - - if (perf_evsel__is_group_event(evsel)) - nr = evsel->core.nr_members; - size += sizeof(al->data[0]) * nr; - - al = zalloc(size); - if (al) { - al = (void *) al + privsize; - al->privsize = privsize; - al->offset = args->offset; - al->line = strdup(args->line); - al->line_nr = args->line_nr; - al->data_nr = nr; - } - - return al; + return (sizeof(struct disasm_line) + (sizeof(al->data[0]) * nr)); } /* * Allocating the disasm annotation line data with * following structure: * - * ------------------------------------------------------------ - * privsize space | struct disasm_line | struct annotation_line - * ------------------------------------------------------------ + * ------------------------------------------- + * struct disasm_line | struct annotation_line + * ------------------------------------------- * * We have 'struct annotation_line' member as last member * of 'struct disasm_line' to have an easy access. - * */ static struct disasm_line *disasm_line__new(struct annotate_args *args) { struct disasm_line *dl = NULL; - struct annotation_line *al; - size_t privsize = args->privsize + offsetof(struct disasm_line, al); + int nr = 1; - al = annotation_line__new(args, privsize); - if (al != NULL) { - dl = disasm_line(al); + if (perf_evsel__is_group_event(args->evsel)) + nr = args->evsel->core.nr_members; - if (dl->al.line == NULL) - goto out_delete; + dl = zalloc(disasm_line_size(nr)); + if (!dl) + return NULL; - if (args->offset != -1) { - if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) - goto out_free_line; + annotation_line__init(&dl->al, args, nr); + if (dl->al.line == NULL) + goto out_delete; - disasm_line__init_ins(dl, args->arch, &args->ms); - } + if (args->offset != -1) { + if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) + goto out_free_line; + + disasm_line__init_ins(dl, args->arch, &args->ms); } return dl; @@ -1248,7 +1225,8 @@ void disasm_line__free(struct disasm_line *dl) else ins__delete(&dl->ops); zfree(&dl->ins.name); - annotation_line__delete(&dl->al); + annotation_line__exit(&dl->al); + free(dl); } int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw, int max_ins_name) @@ -2149,13 +2127,12 @@ void symbol__calc_percent(struct symbol *sym, struct evsel *evsel) annotation__calc_percent(notes, evsel, symbol__size(sym)); } -int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, size_t privsize, +int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, struct annotation_options *options, struct arch **parch) { struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); struct annotate_args args = { - .privsize = privsize, .evsel = evsel, .options = options, }; @@ -2644,6 +2621,8 @@ void annotation__set_offsets(struct annotation *notes, s64 size) struct annotation_line *al; notes->max_line_len = 0; + notes->nr_entries = 0; + notes->nr_asm_entries = 0; list_for_each_entry(al, ¬es->src->source, node) { size_t line_len = strlen(al->line); @@ -2790,7 +2769,7 @@ int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel, struct symbol *sym = ms->sym; struct rb_root source_line = RB_ROOT; - if (symbol__annotate(ms, evsel, 0, opts, NULL) < 0) + if (symbol__annotate(ms, evsel, opts, NULL) < 0) return -1; symbol__calc_percent(sym, evsel); @@ -2915,9 +2894,9 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati percent = annotation_data__percent(&al->data[i], percent_type); obj__set_percent_color(obj, percent, current_entry); - if (notes->options->show_total_period) { + if (symbol_conf.show_total_period) { obj__printf(obj, "%11" PRIu64 " ", al->data[i].he.period); - } else if (notes->options->show_nr_samples) { + } else if (symbol_conf.show_nr_samples) { obj__printf(obj, "%6" PRIu64 " ", al->data[i].he.nr_samples); } else { @@ -2931,8 +2910,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati obj__printf(obj, "%-*s", pcnt_width, " "); else { obj__printf(obj, "%-*s", pcnt_width, - notes->options->show_total_period ? "Period" : - notes->options->show_nr_samples ? "Samples" : "Percent"); + symbol_conf.show_total_period ? "Period" : + symbol_conf.show_nr_samples ? "Samples" : "Percent"); } } @@ -3070,7 +3049,7 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, if (perf_evsel__is_group_event(evsel)) nr_pcnt = evsel->core.nr_members; - err = symbol__annotate(ms, evsel, 0, options, parch); + err = symbol__annotate(ms, evsel, options, parch); if (err) goto out_free_offsets; @@ -3094,69 +3073,46 @@ out_free_offsets: return err; } -#define ANNOTATION__CFG(n) \ - { .name = #n, .value = &annotation__default_options.n, } - -/* - * Keep the entries sorted, they are bsearch'ed - */ -static struct annotation_config { - const char *name; - void *value; -} annotation__configs[] = { - ANNOTATION__CFG(hide_src_code), - ANNOTATION__CFG(jump_arrows), - ANNOTATION__CFG(offset_level), - ANNOTATION__CFG(show_linenr), - ANNOTATION__CFG(show_nr_jumps), - ANNOTATION__CFG(show_nr_samples), - ANNOTATION__CFG(show_total_period), - ANNOTATION__CFG(use_offset), -}; - -#undef ANNOTATION__CFG - -static int annotation_config__cmp(const void *name, const void *cfgp) -{ - const struct annotation_config *cfg = cfgp; - - return strcmp(name, cfg->name); -} - -static int annotation__config(const char *var, const char *value, - void *data __maybe_unused) +static int annotation__config(const char *var, const char *value, void *data) { - struct annotation_config *cfg; - const char *name; + struct annotation_options *opt = data; if (!strstarts(var, "annotate.")) return 0; - name = var + 9; - cfg = bsearch(name, annotation__configs, ARRAY_SIZE(annotation__configs), - sizeof(struct annotation_config), annotation_config__cmp); - - if (cfg == NULL) - pr_debug("%s variable unknown, ignoring...", var); - else if (strcmp(var, "annotate.offset_level") == 0) { - perf_config_int(cfg->value, name, value); - - if (*(int *)cfg->value > ANNOTATION__MAX_OFFSET_LEVEL) - *(int *)cfg->value = ANNOTATION__MAX_OFFSET_LEVEL; - else if (*(int *)cfg->value < ANNOTATION__MIN_OFFSET_LEVEL) - *(int *)cfg->value = ANNOTATION__MIN_OFFSET_LEVEL; + if (!strcmp(var, "annotate.offset_level")) { + perf_config_u8(&opt->offset_level, "offset_level", value); + + if (opt->offset_level > ANNOTATION__MAX_OFFSET_LEVEL) + opt->offset_level = ANNOTATION__MAX_OFFSET_LEVEL; + else if (opt->offset_level < ANNOTATION__MIN_OFFSET_LEVEL) + opt->offset_level = ANNOTATION__MIN_OFFSET_LEVEL; + } else if (!strcmp(var, "annotate.hide_src_code")) { + opt->hide_src_code = perf_config_bool("hide_src_code", value); + } else if (!strcmp(var, "annotate.jump_arrows")) { + opt->jump_arrows = perf_config_bool("jump_arrows", value); + } else if (!strcmp(var, "annotate.show_linenr")) { + opt->show_linenr = perf_config_bool("show_linenr", value); + } else if (!strcmp(var, "annotate.show_nr_jumps")) { + opt->show_nr_jumps = perf_config_bool("show_nr_jumps", value); + } else if (!strcmp(var, "annotate.show_nr_samples")) { + symbol_conf.show_nr_samples = perf_config_bool("show_nr_samples", + value); + } else if (!strcmp(var, "annotate.show_total_period")) { + symbol_conf.show_total_period = perf_config_bool("show_total_period", + value); + } else if (!strcmp(var, "annotate.use_offset")) { + opt->use_offset = perf_config_bool("use_offset", value); } else { - *(bool *)cfg->value = perf_config_bool(name, value); + pr_debug("%s variable unknown, ignoring...", var); } + return 0; } -void annotation_config__init(void) +void annotation_config__init(struct annotation_options *opt) { - perf_config(annotation__config, NULL); - - annotation__default_options.show_total_period = symbol_conf.show_total_period; - annotation__default_options.show_nr_samples = symbol_conf.show_nr_samples; + perf_config(annotation__config, opt); } static unsigned int parse_percent_type(char *str1, char *str2) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 455403e8fede..001258601a37 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -83,8 +83,6 @@ struct annotation_options { full_path, show_linenr, show_nr_jumps, - show_nr_samples, - show_total_period, show_minmax_cycle, show_asm_raw, annotate_src; @@ -141,7 +139,6 @@ struct annotation_line { u64 cycles; u64 cycles_max; u64 cycles_min; - size_t privsize; char *path; u32 idx; int idx_asm; @@ -309,7 +306,7 @@ static inline int annotation__cycles_width(struct annotation *notes) static inline int annotation__pcnt_width(struct annotation *notes) { - return (notes->options->show_total_period ? 12 : 7) * notes->nr_events; + return (symbol_conf.show_total_period ? 12 : 7) * notes->nr_events; } static inline bool annotation_line__filter(struct annotation_line *al, struct annotation *notes) @@ -352,7 +349,7 @@ struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists); void symbol__annotate_zero_histograms(struct symbol *sym); int symbol__annotate(struct map_symbol *ms, - struct evsel *evsel, size_t privsize, + struct evsel *evsel, struct annotation_options *options, struct arch **parch); int symbol__annotate2(struct map_symbol *ms, @@ -413,7 +410,7 @@ static inline int symbol__tui_annotate(struct map_symbol *ms __maybe_unused, } #endif -void annotation_config__init(void); +void annotation_config__init(struct annotation_options *opt); int annotate_parse_percent_type(const struct option *opt, const char *_str, int unset); diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index eb087e7df6f4..3571ce72ca28 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -629,8 +629,10 @@ int auxtrace_record__options(struct auxtrace_record *itr, struct evlist *evlist, struct record_opts *opts) { - if (itr) + if (itr) { + itr->evlist = evlist; return itr->recording_options(itr, evlist, opts); + } return 0; } @@ -664,6 +666,24 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr, return -EINVAL; } +int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx) +{ + struct evsel *evsel; + + if (!itr->evlist || !itr->pmu) + return -EINVAL; + + evlist__for_each_entry(itr->evlist, evsel) { + if (evsel->core.attr.type == itr->pmu->type) { + if (evsel->disabled) + return 0; + return perf_evlist__enable_event_idx(itr->evlist, evsel, + idx); + } + } + return -EINVAL; +} + /* * Event record size is 16-bit which results in a maximum size of about 64KiB. * Allow about 4KiB for the rest of the sample record, to give a maximum diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 749d72cd9c7b..e58ef160b599 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -29,6 +29,7 @@ struct record_opts; struct perf_record_auxtrace_error; struct perf_record_auxtrace_info; struct events_stats; +struct perf_pmu; enum auxtrace_error_type { PERF_AUXTRACE_ERROR_ITRACE = 1, @@ -322,6 +323,8 @@ struct auxtrace_mmap_params { * @read_finish: called after reading from an auxtrace mmap * @alignment: alignment (if any) for AUX area data * @default_aux_sample_size: default sample size for --aux sample option + * @pmu: associated pmu + * @evlist: selected events list */ struct auxtrace_record { int (*recording_options)(struct auxtrace_record *itr, @@ -346,6 +349,8 @@ struct auxtrace_record { int (*read_finish)(struct auxtrace_record *itr, int idx); unsigned int alignment; unsigned int default_aux_sample_size; + struct perf_pmu *pmu; + struct evlist *evlist; }; /** @@ -537,6 +542,7 @@ int auxtrace_record__find_snapshot(struct auxtrace_record *itr, int idx, struct auxtrace_mmap *mm, unsigned char *data, u64 *head, u64 *old); u64 auxtrace_record__reference(struct auxtrace_record *itr); +int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx); int auxtrace_index__auxtrace_event(struct list_head *head, union perf_event *event, off_t file_offset); diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 0bc9c4d7fdc5..ef38eba56ed0 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -374,6 +374,18 @@ int perf_config_int(int *dest, const char *name, const char *value) return 0; } +int perf_config_u8(u8 *dest, const char *name, const char *value) +{ + long ret = 0; + + if (!perf_parse_long(value, &ret)) { + bad_config(name); + return -1; + } + *dest = ret; + return 0; +} + static int perf_config_bool_or_int(const char *name, const char *value, int *is_bool) { int ret; diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index bd0a5897c76a..c10b66dde2f3 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -29,6 +29,7 @@ typedef int (*config_fn_t)(const char *, const char *, void *); int perf_default_config(const char *, const char *, void *); int perf_config(config_fn_t fn, void *); int perf_config_int(int *dest, const char *, const char *); +int perf_config_u8(u8 *dest, const char *name, const char *value); int perf_config_u64(u64 *dest, const char *, const char *); int perf_config_bool(const char *, const char *); int config_error_nonbool(const char *); diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 5003ba403345..0f5fda11675f 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -301,10 +301,15 @@ int probe_file__get_events(int fd, struct strfilter *filter, p = strchr(ent->s, ':'); if ((p && strfilter__compare(filter, p + 1)) || strfilter__compare(filter, ent->s)) { - strlist__add(plist, ent->s); + ret = strlist__add(plist, ent->s); + if (ret == -ENOMEM) { + pr_err("strlist__add failed with -ENOMEM\n"); + goto out; + } ret = 0; } } +out: strlist__delete(namelist); return ret; @@ -511,7 +516,11 @@ static int probe_cache__load(struct probe_cache *pcache) ret = -EINVAL; goto out; } - strlist__add(entry->tevlist, buf); + ret = strlist__add(entry->tevlist, buf); + if (ret == -ENOMEM) { + pr_err("strlist__add failed with -ENOMEM\n"); + goto out; + } } } out: @@ -672,7 +681,12 @@ int probe_cache__add_entry(struct probe_cache *pcache, command = synthesize_probe_trace_command(&tevs[i]); if (!command) goto out_err; - strlist__add(entry->tevlist, command); + ret = strlist__add(entry->tevlist, command); + if (ret == -ENOMEM) { + pr_err("strlist__add failed with -ENOMEM\n"); + goto out_err; + } + free(command); } list_add_tail(&entry->node, &pcache->entries); @@ -853,9 +867,15 @@ int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname) break; } - strlist__add(entry->tevlist, buf); + ret = strlist__add(entry->tevlist, buf); + free(buf); entry = NULL; + + if (ret == -ENOMEM) { + pr_err("strlist__add failed with -ENOMEM\n"); + break; + } } if (entry) { list_del_init(&entry->node); diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index ded7a950dc40..59f31f01cb93 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -106,6 +106,7 @@ ifneq ($(silent),1) ifneq ($(V),1) QUIET_CC = @echo ' CC '$@; QUIET_CC_FPIC = @echo ' CC FPIC '$@; + QUIET_CLANG = @echo ' CLANG '$@; QUIET_AR = @echo ' AR '$@; QUIET_LINK = @echo ' LINK '$@; QUIET_MKDIR = @echo ' MKDIR '$@; diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 220d04f958a6..7570e36d636d 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -30,7 +30,7 @@ my %default = ( "EMAIL_WHEN_STARTED" => 0, "NUM_TESTS" => 1, "TEST_TYPE" => "build", - "BUILD_TYPE" => "randconfig", + "BUILD_TYPE" => "oldconfig", "MAKE_CMD" => "make", "CLOSE_CONSOLE_SIGNAL" => "INT", "TIMEOUT" => 120, @@ -1030,7 +1030,7 @@ sub __read_config { } if (!$skip && $rest !~ /^\s*$/) { - die "$name: $.: Gargbage found after $type\n$_"; + die "$name: $.: Garbage found after $type\n$_"; } if ($skip && $type eq "TEST_START") { @@ -1063,7 +1063,7 @@ sub __read_config { } if ($rest !~ /^\s*$/) { - die "$name: $.: Gargbage found after DEFAULTS\n$_"; + die "$name: $.: Garbage found after DEFAULTS\n$_"; } } elsif (/^\s*INCLUDE\s+(\S+)/) { @@ -1154,7 +1154,7 @@ sub __read_config { # on of these sections that have SKIP defined. # The save variable can be # defined multiple times and the new one simply overrides - # the prevous one. + # the previous one. set_variable($lvalue, $rvalue); } else { @@ -1234,7 +1234,7 @@ sub read_config { foreach my $option (keys %not_used) { print "$option\n"; } - print "Set IGRNORE_UNUSED = 1 to have ktest ignore unused variables\n"; + print "Set IGNORE_UNUSED = 1 to have ktest ignore unused variables\n"; if (!read_yn "Do you want to continue?") { exit -1; } @@ -1345,7 +1345,7 @@ sub eval_option { # Check for recursive evaluations. # 100 deep should be more than enough. if ($r++ > 100) { - die "Over 100 evaluations accurred with $option\n" . + die "Over 100 evaluations occurred with $option\n" . "Check for recursive variables\n"; } $prev = $option; @@ -1383,7 +1383,7 @@ sub reboot { } else { # Make sure everything has been written to disk - run_ssh("sync"); + run_ssh("sync", 10); if (defined($time)) { start_monitor; @@ -1461,7 +1461,7 @@ sub get_test_name() { sub dodie { - # avoid recusion + # avoid recursion return if ($in_die); $in_die = 1; diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf index c3bc933d437b..27666b8007ed 100644 --- a/tools/testing/ktest/sample.conf +++ b/tools/testing/ktest/sample.conf @@ -10,7 +10,7 @@ # # Options set in the beginning of the file are considered to be -# default options. These options can be overriden by test specific +# default options. These options can be overridden by test specific # options, with the following exceptions: # # LOG_FILE @@ -204,7 +204,7 @@ # # This config file can also contain "config variables". # These are assigned with ":=" instead of the ktest option -# assigment "=". +# assignment "=". # # The difference between ktest options and config variables # is that config variables can be used multiple times, @@ -263,7 +263,7 @@ #### Using options in other options #### # # Options that are defined in the config file may also be used -# by other options. All options are evaulated at time of +# by other options. All options are evaluated at time of # use (except that config variables are evaluated at config # processing time). # @@ -505,7 +505,7 @@ #TEST = ssh user@machine /root/run_test # The build type is any make config type or special command -# (default randconfig) +# (default oldconfig) # nobuild - skip the clean and build step # useconfig:/path/to/config - use the given config and run # oldconfig on it. @@ -707,7 +707,7 @@ # Line to define a successful boot up in console output. # This is what the line contains, not the entire line. If you need -# the entire line to match, then use regural expression syntax like: +# the entire line to match, then use regular expression syntax like: # (do not add any quotes around it) # # SUCCESS_LINE = ^MyBox Login:$ @@ -839,7 +839,7 @@ # (ignored if POWEROFF_ON_SUCCESS is set) #REBOOT_ON_SUCCESS = 1 -# In case there are isses with rebooting, you can specify this +# In case there are issues with rebooting, you can specify this # to always powercycle after this amount of time after calling # reboot. # Note, POWERCYCLE_AFTER_REBOOT = 0 does NOT disable it. It just @@ -848,7 +848,7 @@ # (default undefined) #POWERCYCLE_AFTER_REBOOT = 5 -# In case there's isses with halting, you can specify this +# In case there's issues with halting, you can specify this # to always poweroff after this amount of time after calling # halt. # Note, POWEROFF_AFTER_HALT = 0 does NOT disable it. It just @@ -972,7 +972,7 @@ # # PATCHCHECK_START is required and is the first patch to # test (the SHA1 of the commit). You may also specify anything -# that git checkout allows (branch name, tage, HEAD~3). +# that git checkout allows (branch name, tag, HEAD~3). # # PATCHCHECK_END is the last patch to check (default HEAD) # @@ -994,7 +994,7 @@ # IGNORE_WARNINGS is set for the given commit's sha1 # # IGNORE_WARNINGS can be used to disable the failure of patchcheck -# on a particuler commit (SHA1). You can add more than one commit +# on a particular commit (SHA1). You can add more than one commit # by adding a list of SHA1s that are space delimited. # # If BUILD_NOCLEAN is set, then make mrproper will not be run on @@ -1093,7 +1093,7 @@ # whatever reason. (Can't reboot, want to inspect each iteration) # Doing a BISECT_MANUAL will have the test wait for you to # tell it if the test passed or failed after each iteration. -# This is basicall the same as running git bisect yourself +# This is basically the same as running git bisect yourself # but ktest will rebuild and install the kernel for you. # # BISECT_CHECK = 1 (optional, default 0) @@ -1239,7 +1239,7 @@ # # CONFIG_BISECT_EXEC (optional) # The config bisect is a separate program that comes with ktest.pl. -# By befault, it will look for: +# By default, it will look for: # `pwd`/config-bisect.pl # the location ktest.pl was executed from. # If it does not find it there, it will look for: # `dirname <ktest.pl>`/config-bisect.pl # The directory that holds ktest.pl diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index ec464859c6b6..2198cd876675 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -31,6 +31,7 @@ test_tcp_check_syncookie_user test_sysctl test_hashmap test_btf_dump +test_current_pid_tgid_new_ns xdping test_cpp *.skel.h diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 2d7f5df33f04..7729892e0b04 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -20,8 +20,9 @@ CLANG ?= clang LLC ?= llc LLVM_OBJCOPY ?= llvm-objcopy BPF_GCC ?= $(shell command -v bpf-gcc;) -CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) -I$(CURDIR) -I$(APIDIR) \ +CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) -I$(CURDIR) \ -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) -I$(TOOLSINCDIR) \ + -I$(APIDIR) \ -Dbpf_prog_load=bpf_prog_test_load \ -Dbpf_load_program=bpf_test_load_program LDLIBS += -lcap -lelf -lz -lrt -lpthread @@ -32,7 +33,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \ test_cgroup_storage \ test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \ - test_progs-no_alu32 + test_progs-no_alu32 \ + test_current_pid_tgid_new_ns # Also test bpf-gcc, if present ifneq ($(BPF_GCC),) @@ -129,10 +131,13 @@ $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) $(call msg,CC,,$@) $(CC) -c $(CFLAGS) -o $@ $< -VMLINUX_BTF_PATHS := $(abspath ../../../../vmlinux) \ - /sys/kernel/btf/vmlinux \ - /boot/vmlinux-$(shell uname -r) -VMLINUX_BTF:= $(firstword $(wildcard $(VMLINUX_BTF_PATHS))) +VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \ + $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ + ../../../../vmlinux \ + /sys/kernel/btf/vmlinux \ + /boot/vmlinux-$(shell uname -r) +VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) + $(OUTPUT)/runqslower: $(BPFOBJ) $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \ @@ -172,6 +177,10 @@ $(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(INCLUDE_DIR): $(call msg,MKDIR,,$@) mkdir -p $@ +$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR) + $(call msg,GEN,,$@) + $(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ + # Get Clang's default includes on this system, as opposed to those seen by # '-target bpf'. This fixes "missing" files on some architectures/distros, # such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. @@ -190,8 +199,8 @@ MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian) CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ - -I$(INCLUDE_DIR) -I$(CURDIR) -I$(CURDIR)/include/uapi \ - -I$(APIDIR) -I$(abspath $(OUTPUT)/../usr/include) + -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \ + -I$(abspath $(OUTPUT)/../usr/include) CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \ -Wno-compare-distinct-pointer-types @@ -280,6 +289,7 @@ $(TRUNNER_BPF_PROGS_DIR)$(if $2,-)$2-bpfobjs := y $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $(TRUNNER_BPF_PROGS_DIR)/%.c \ $(TRUNNER_BPF_PROGS_DIR)/*.h \ + $$(INCLUDE_DIR)/vmlinux.h \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ $(TRUNNER_BPF_CFLAGS), \ diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h index 8f21965ffc6c..5bf2fe9b1efa 100644 --- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h +++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h @@ -6,7 +6,7 @@ #include <linux/types.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_core_read.h> -#include "bpf_trace_helpers.h" +#include <bpf/bpf_tracing.h> #define BPF_STRUCT_OPS(name, args...) \ SEC("struct_ops/"#name) \ diff --git a/tools/testing/selftests/bpf/bpf_trace_helpers.h b/tools/testing/selftests/bpf/bpf_trace_helpers.h deleted file mode 100644 index c6f1354d93fb..000000000000 --- a/tools/testing/selftests/bpf/bpf_trace_helpers.h +++ /dev/null @@ -1,120 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ -#ifndef __BPF_TRACE_HELPERS_H -#define __BPF_TRACE_HELPERS_H - -#include <bpf/bpf_helpers.h> - -#define ___bpf_concat(a, b) a ## b -#define ___bpf_apply(fn, n) ___bpf_concat(fn, n) -#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N -#define ___bpf_narg(...) \ - ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) -#define ___bpf_empty(...) \ - ___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0) - -#define ___bpf_ctx_cast0() ctx -#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0] -#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1] -#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2] -#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3] -#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4] -#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5] -#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6] -#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7] -#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8] -#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9] -#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10] -#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11] -#define ___bpf_ctx_cast(args...) \ - ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args) - -/* - * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and - * similar kinds of BPF programs, that accept input arguments as a single - * pointer to untyped u64 array, where each u64 can actually be a typed - * pointer or integer of different size. Instead of requring user to write - * manual casts and work with array elements by index, BPF_PROG macro - * allows user to declare a list of named and typed input arguments in the - * same syntax as for normal C function. All the casting is hidden and - * performed transparently, while user code can just assume working with - * function arguments of specified type and name. - * - * Original raw context argument is preserved as well as 'ctx' argument. - * This is useful when using BPF helpers that expect original context - * as one of the parameters (e.g., for bpf_perf_event_output()). - */ -#define BPF_PROG(name, args...) \ -name(unsigned long long *ctx); \ -static __always_inline typeof(name(0)) \ -____##name(unsigned long long *ctx, ##args); \ -typeof(name(0)) name(unsigned long long *ctx) \ -{ \ - _Pragma("GCC diagnostic push") \ - _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ - return ____##name(___bpf_ctx_cast(args)); \ - _Pragma("GCC diagnostic pop") \ -} \ -static __always_inline typeof(name(0)) \ -____##name(unsigned long long *ctx, ##args) - -struct pt_regs; - -#define ___bpf_kprobe_args0() ctx -#define ___bpf_kprobe_args1(x) \ - ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx) -#define ___bpf_kprobe_args2(x, args...) \ - ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx) -#define ___bpf_kprobe_args3(x, args...) \ - ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx) -#define ___bpf_kprobe_args4(x, args...) \ - ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx) -#define ___bpf_kprobe_args5(x, args...) \ - ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx) -#define ___bpf_kprobe_args(args...) \ - ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args) - -/* - * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for - * tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific - * low-level way of getting kprobe input arguments from struct pt_regs, and - * provides a familiar typed and named function arguments syntax and - * semantics of accessing kprobe input paremeters. - * - * Original struct pt_regs* context is preserved as 'ctx' argument. This might - * be necessary when using BPF helpers like bpf_perf_event_output(). - */ -#define BPF_KPROBE(name, args...) \ -name(struct pt_regs *ctx); \ -static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\ -typeof(name(0)) name(struct pt_regs *ctx) \ -{ \ - _Pragma("GCC diagnostic push") \ - _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ - return ____##name(___bpf_kprobe_args(args)); \ - _Pragma("GCC diagnostic pop") \ -} \ -static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) - -#define ___bpf_kretprobe_args0() ctx -#define ___bpf_kretprobe_argsN(x, args...) \ - ___bpf_kprobe_args(args), (void *)PT_REGS_RET(ctx) -#define ___bpf_kretprobe_args(args...) \ - ___bpf_apply(___bpf_kretprobe_args, ___bpf_empty(args))(args) - -/* - * BPF_KRETPROBE is similar to BPF_KPROBE, except, in addition to listing all - * input kprobe arguments, one last extra argument has to be specified, which - * captures kprobe return value. - */ -#define BPF_KRETPROBE(name, args...) \ -name(struct pt_regs *ctx); \ -static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\ -typeof(name(0)) name(struct pt_regs *ctx) \ -{ \ - _Pragma("GCC diagnostic push") \ - _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ - return ____##name(___bpf_kretprobe_args(args)); \ - _Pragma("GCC diagnostic pop") \ -} \ -static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) -#endif diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c index 5b13f2c6c402..70e94e783070 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c @@ -6,7 +6,7 @@ #define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" -char bpf_log_buf[BPF_LOG_BUF_SIZE]; +static char bpf_log_buf[BPF_LOG_BUF_SIZE]; static int prog_load(void) { diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c index 2ff21dbce179..139f8e82c7c6 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c @@ -6,7 +6,7 @@ #define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" -char bpf_log_buf[BPF_LOG_BUF_SIZE]; +static char bpf_log_buf[BPF_LOG_BUF_SIZE]; static int map_fd = -1; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c index 9d8cb48b99de..9e96f8d87fea 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c @@ -8,7 +8,7 @@ #define BAR "/foo/bar/" #define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" -char bpf_log_buf[BPF_LOG_BUF_SIZE]; +static char bpf_log_buf[BPF_LOG_BUF_SIZE]; static int prog_load(int verdict) { diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c index 235ac4f67f5b..83493bd5745c 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c @@ -1,22 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include <test_progs.h> -#include "test_pkt_access.skel.h" #include "fentry_test.skel.h" #include "fexit_test.skel.h" void test_fentry_fexit(void) { - struct test_pkt_access *pkt_skel = NULL; struct fentry_test *fentry_skel = NULL; struct fexit_test *fexit_skel = NULL; __u64 *fentry_res, *fexit_res; __u32 duration = 0, retval; - int err, pkt_fd, i; + int err, prog_fd, i; - pkt_skel = test_pkt_access__open_and_load(); - if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n")) - return; fentry_skel = fentry_test__open_and_load(); if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n")) goto close_prog; @@ -31,8 +26,8 @@ void test_fentry_fexit(void) if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) goto close_prog; - pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access); - err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), + prog_fd = bpf_program__fd(fexit_skel->progs.test1); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); CHECK(err || retval, "ipv6", "err %d errno %d retval %d duration %d\n", @@ -49,7 +44,6 @@ void test_fentry_fexit(void) } close_prog: - test_pkt_access__destroy(pkt_skel); fentry_test__destroy(fentry_skel); fexit_test__destroy(fexit_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c index 5cc06021f27d..04ebbf1cb390 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c @@ -1,20 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include <test_progs.h> -#include "test_pkt_access.skel.h" #include "fentry_test.skel.h" void test_fentry_test(void) { - struct test_pkt_access *pkt_skel = NULL; struct fentry_test *fentry_skel = NULL; - int err, pkt_fd, i; + int err, prog_fd, i; __u32 duration = 0, retval; __u64 *result; - pkt_skel = test_pkt_access__open_and_load(); - if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n")) - return; fentry_skel = fentry_test__open_and_load(); if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n")) goto cleanup; @@ -23,10 +18,10 @@ void test_fentry_test(void) if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err)) goto cleanup; - pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access); - err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), + prog_fd = bpf_program__fd(fentry_skel->progs.test1); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); - CHECK(err || retval, "ipv6", + CHECK(err || retval, "test_run", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration); @@ -39,5 +34,4 @@ void test_fentry_test(void) cleanup: fentry_test__destroy(fentry_skel); - test_pkt_access__destroy(pkt_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c index d2c3655dd7a3..78d7a2765c27 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c @@ -1,64 +1,37 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include <test_progs.h> +#include "fexit_test.skel.h" void test_fexit_test(void) { - struct bpf_prog_load_attr attr = { - .file = "./fexit_test.o", - }; - - char prog_name[] = "fexit/bpf_fentry_testX"; - struct bpf_object *obj = NULL, *pkt_obj; - int err, pkt_fd, kfree_skb_fd, i; - struct bpf_link *link[6] = {}; - struct bpf_program *prog[6]; + struct fexit_test *fexit_skel = NULL; + int err, prog_fd, i; __u32 duration = 0, retval; - struct bpf_map *data_map; - const int zero = 0; - u64 result[6]; + __u64 *result; - err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, - &pkt_obj, &pkt_fd); - if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) - return; - err = bpf_prog_load_xattr(&attr, &obj, &kfree_skb_fd); - if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno)) - goto close_prog; + fexit_skel = fexit_test__open_and_load(); + if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n")) + goto cleanup; - for (i = 0; i < 6; i++) { - prog_name[sizeof(prog_name) - 2] = '1' + i; - prog[i] = bpf_object__find_program_by_title(obj, prog_name); - if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name)) - goto close_prog; - link[i] = bpf_program__attach_trace(prog[i]); - if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) - goto close_prog; - } - data_map = bpf_object__find_map_by_name(obj, "fexit_te.bss"); - if (CHECK(!data_map, "find_data_map", "data map not found\n")) - goto close_prog; + err = fexit_test__attach(fexit_skel); + if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) + goto cleanup; - err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), + prog_fd = bpf_program__fd(fexit_skel->progs.test1); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); - CHECK(err || retval, "ipv6", + CHECK(err || retval, "test_run", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration); - err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &result); - if (CHECK(err, "get_result", - "failed to get output data: %d\n", err)) - goto close_prog; - - for (i = 0; i < 6; i++) - if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n", - i + 1, result[i])) - goto close_prog; + result = (__u64 *)fexit_skel->bss; + for (i = 0; i < 6; i++) { + if (CHECK(result[i] != 1, "result", + "fexit_test%d failed err %lld\n", i + 1, result[i])) + goto cleanup; + } -close_prog: - for (i = 0; i < 6; i++) - if (!IS_ERR_OR_NULL(link[i])) - bpf_link__destroy(link[i]); - bpf_object__close(obj); - bpf_object__close(pkt_obj); +cleanup: + fexit_test__destroy(fexit_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/link_pinning.c b/tools/testing/selftests/bpf/prog_tests/link_pinning.c new file mode 100644 index 000000000000..a743288cf384 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/link_pinning.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <test_progs.h> +#include <sys/stat.h> + +#include "test_link_pinning.skel.h" + +static int duration = 0; + +void test_link_pinning_subtest(struct bpf_program *prog, + struct test_link_pinning__bss *bss) +{ + const char *link_pin_path = "/sys/fs/bpf/pinned_link_test"; + struct stat statbuf = {}; + struct bpf_link *link; + int err, i; + + link = bpf_program__attach(prog); + if (CHECK(IS_ERR(link), "link_attach", "err: %ld\n", PTR_ERR(link))) + goto cleanup; + + bss->in = 1; + usleep(1); + CHECK(bss->out != 1, "res_check1", "exp %d, got %d\n", 1, bss->out); + + /* pin link */ + err = bpf_link__pin(link, link_pin_path); + if (CHECK(err, "link_pin", "err: %d\n", err)) + goto cleanup; + + CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path1", + "exp %s, got %s\n", link_pin_path, bpf_link__pin_path(link)); + + /* check that link was pinned */ + err = stat(link_pin_path, &statbuf); + if (CHECK(err, "stat_link", "err %d errno %d\n", err, errno)) + goto cleanup; + + bss->in = 2; + usleep(1); + CHECK(bss->out != 2, "res_check2", "exp %d, got %d\n", 2, bss->out); + + /* destroy link, pinned link should keep program attached */ + bpf_link__destroy(link); + link = NULL; + + bss->in = 3; + usleep(1); + CHECK(bss->out != 3, "res_check3", "exp %d, got %d\n", 3, bss->out); + + /* re-open link from BPFFS */ + link = bpf_link__open(link_pin_path); + if (CHECK(IS_ERR(link), "link_open", "err: %ld\n", PTR_ERR(link))) + goto cleanup; + + CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path2", + "exp %s, got %s\n", link_pin_path, bpf_link__pin_path(link)); + + /* unpin link from BPFFS, program still attached */ + err = bpf_link__unpin(link); + if (CHECK(err, "link_unpin", "err: %d\n", err)) + goto cleanup; + + /* still active, as we have FD open now */ + bss->in = 4; + usleep(1); + CHECK(bss->out != 4, "res_check4", "exp %d, got %d\n", 4, bss->out); + + bpf_link__destroy(link); + link = NULL; + + /* Validate it's finally detached. + * Actual detachment might get delayed a bit, so there is no reliable + * way to validate it immediately here, let's count up for long enough + * and see if eventually output stops being updated + */ + for (i = 5; i < 10000; i++) { + bss->in = i; + usleep(1); + if (bss->out == i - 1) + break; + } + CHECK(i == 10000, "link_attached", "got to iteration #%d\n", i); + +cleanup: + if (!IS_ERR(link)) + bpf_link__destroy(link); +} + +void test_link_pinning(void) +{ + struct test_link_pinning* skel; + + skel = test_link_pinning__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + + if (test__start_subtest("pin_raw_tp")) + test_link_pinning_subtest(skel->progs.raw_tp_prog, skel->bss); + if (test__start_subtest("pin_tp_btf")) + test_link_pinning_subtest(skel->progs.tp_btf_prog, skel->bss); + + test_link_pinning__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/modify_return.c b/tools/testing/selftests/bpf/prog_tests/modify_return.c new file mode 100644 index 000000000000..97fec70c600b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/modify_return.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2020 Google LLC. + */ + +#include <test_progs.h> +#include "modify_return.skel.h" + +#define LOWER(x) ((x) & 0xffff) +#define UPPER(x) ((x) >> 16) + + +static void run_test(__u32 input_retval, __u16 want_side_effect, __s16 want_ret) +{ + struct modify_return *skel = NULL; + int err, prog_fd; + __u32 duration = 0, retval; + __u16 side_effect; + __s16 ret; + + skel = modify_return__open_and_load(); + if (CHECK(!skel, "skel_load", "modify_return skeleton failed\n")) + goto cleanup; + + err = modify_return__attach(skel); + if (CHECK(err, "modify_return", "attach failed: %d\n", err)) + goto cleanup; + + skel->bss->input_retval = input_retval; + prog_fd = bpf_program__fd(skel->progs.fmod_ret_test); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, 0, + &retval, &duration); + + CHECK(err, "test_run", "err %d errno %d\n", err, errno); + + side_effect = UPPER(retval); + ret = LOWER(retval); + + CHECK(ret != want_ret, "test_run", + "unexpected ret: %d, expected: %d\n", ret, want_ret); + CHECK(side_effect != want_side_effect, "modify_return", + "unexpected side_effect: %d\n", side_effect); + + CHECK(skel->bss->fentry_result != 1, "modify_return", + "fentry failed\n"); + CHECK(skel->bss->fexit_result != 1, "modify_return", + "fexit failed\n"); + CHECK(skel->bss->fmod_ret_result != 1, "modify_return", + "fmod_ret failed\n"); + +cleanup: + modify_return__destroy(skel); +} + +void test_modify_return(void) +{ + run_test(0 /* input_retval */, + 1 /* want_side_effect */, + 4 /* want_ret */); + run_test(-EINVAL /* input_retval */, + 0 /* want_side_effect */, + -EINVAL /* want_ret */); +} + diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c new file mode 100644 index 000000000000..542240e16564 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Carlos Neira cneirabustos@gmail.com */ +#include <test_progs.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#include <sys/syscall.h> + +struct bss { + __u64 dev; + __u64 ino; + __u64 pid_tgid; + __u64 user_pid_tgid; +}; + +void test_ns_current_pid_tgid(void) +{ + const char *probe_name = "raw_tracepoint/sys_enter"; + const char *file = "test_ns_current_pid_tgid.o"; + int err, key = 0, duration = 0; + struct bpf_link *link = NULL; + struct bpf_program *prog; + struct bpf_map *bss_map; + struct bpf_object *obj; + struct bss bss; + struct stat st; + __u64 id; + + obj = bpf_object__open_file(file, NULL); + if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj))) + return; + + err = bpf_object__load(obj); + if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) + goto cleanup; + + bss_map = bpf_object__find_map_by_name(obj, "test_ns_.bss"); + if (CHECK(!bss_map, "find_bss_map", "failed\n")) + goto cleanup; + + prog = bpf_object__find_program_by_title(obj, probe_name); + if (CHECK(!prog, "find_prog", "prog '%s' not found\n", + probe_name)) + goto cleanup; + + memset(&bss, 0, sizeof(bss)); + pid_t tid = syscall(SYS_gettid); + pid_t pid = getpid(); + + id = (__u64) tid << 32 | pid; + bss.user_pid_tgid = id; + + if (CHECK_FAIL(stat("/proc/self/ns/pid", &st))) { + perror("Failed to stat /proc/self/ns/pid"); + goto cleanup; + } + + bss.dev = st.st_dev; + bss.ino = st.st_ino; + + err = bpf_map_update_elem(bpf_map__fd(bss_map), &key, &bss, 0); + if (CHECK(err, "setting_bss", "failed to set bss : %d\n", err)) + goto cleanup; + + link = bpf_program__attach_raw_tracepoint(prog, "sys_enter"); + if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", + PTR_ERR(link))) { + link = NULL; + goto cleanup; + } + + /* trigger some syscalls */ + usleep(1); + + err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &key, &bss); + if (CHECK(err, "set_bss", "failed to get bss : %d\n", err)) + goto cleanup; + + if (CHECK(id != bss.pid_tgid, "Compare user pid/tgid vs. bpf pid/tgid", + "User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid)) + goto cleanup; +cleanup: + if (!link) { + bpf_link__destroy(link); + link = NULL; + } + bpf_object__close(obj); +} diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c index a1dd13b34d4b..821b4146b7b6 100644 --- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c @@ -805,12 +805,6 @@ static void test_config(int sotype, sa_family_t family, bool inany) char s[MAX_TEST_NAME]; const struct test *t; - /* SOCKMAP/SOCKHASH don't support UDP yet */ - if (sotype == SOCK_DGRAM && - (inner_map_type == BPF_MAP_TYPE_SOCKMAP || - inner_map_type == BPF_MAP_TYPE_SOCKHASH)) - return; - for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { if (t->need_sotype && t->need_sotype != sotype) continue; /* test not compatible with socket type */ diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c index c6d6b685a946..4538bd08203f 100644 --- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c +++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c @@ -14,6 +14,7 @@ void test_skb_ctx(void) .wire_len = 100, .gso_segs = 8, .mark = 9, + .gso_size = 10, }; struct bpf_prog_test_run_attr tattr = { .data_in = &pkt_v4, diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index b1b2acea0638..d7d65a700799 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -16,6 +16,7 @@ #include <pthread.h> #include <stdlib.h> #include <string.h> +#include <sys/select.h> #include <unistd.h> #include <bpf/bpf.h> @@ -25,6 +26,7 @@ #include "test_progs.h" #include "test_sockmap_listen.skel.h" +#define IO_TIMEOUT_SEC 30 #define MAX_STRERR_LEN 256 #define MAX_TEST_NAME 80 @@ -44,9 +46,10 @@ /* Wrappers that fail the test on error and report it. */ -#define xaccept(fd, addr, len) \ +#define xaccept_nonblock(fd, addr, len) \ ({ \ - int __ret = accept((fd), (addr), (len)); \ + int __ret = \ + accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \ if (__ret == -1) \ FAIL_ERRNO("accept"); \ __ret; \ @@ -108,6 +111,23 @@ __ret; \ }) +#define xsend(fd, buf, len, flags) \ + ({ \ + ssize_t __ret = send((fd), (buf), (len), (flags)); \ + if (__ret == -1) \ + FAIL_ERRNO("send"); \ + __ret; \ + }) + +#define xrecv_nonblock(fd, buf, len, flags) \ + ({ \ + ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \ + IO_TIMEOUT_SEC); \ + if (__ret == -1) \ + FAIL_ERRNO("recv"); \ + __ret; \ + }) + #define xsocket(family, sotype, flags) \ ({ \ int __ret = socket(family, sotype, flags); \ @@ -175,6 +195,40 @@ __ret; \ }) +static int poll_read(int fd, unsigned int timeout_sec) +{ + struct timeval timeout = { .tv_sec = timeout_sec }; + fd_set rfds; + int r; + + FD_ZERO(&rfds); + FD_SET(fd, &rfds); + + r = select(fd + 1, &rfds, NULL, NULL, &timeout); + if (r == 0) + errno = ETIME; + + return r == 1 ? 0 : -1; +} + +static int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len, + unsigned int timeout_sec) +{ + if (poll_read(fd, timeout_sec)) + return -1; + + return accept(fd, addr, len); +} + +static int recv_timeout(int fd, void *buf, size_t len, int flags, + unsigned int timeout_sec) +{ + if (poll_read(fd, timeout_sec)) + return -1; + + return recv(fd, buf, len, flags); +} + static void init_addr_loopback4(struct sockaddr_storage *ss, socklen_t *len) { struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss)); @@ -230,7 +284,7 @@ static int enable_reuseport(int s, int progfd) return 0; } -static int listen_loopback_reuseport(int family, int sotype, int progfd) +static int socket_loopback_reuseport(int family, int sotype, int progfd) { struct sockaddr_storage addr; socklen_t len; @@ -249,6 +303,9 @@ static int listen_loopback_reuseport(int family, int sotype, int progfd) if (err) goto close; + if (sotype & SOCK_DGRAM) + return s; + err = xlisten(s, SOMAXCONN); if (err) goto close; @@ -259,9 +316,9 @@ close: return -1; } -static int listen_loopback(int family, int sotype) +static int socket_loopback(int family, int sotype) { - return listen_loopback_reuseport(family, sotype, -1); + return socket_loopback_reuseport(family, sotype, -1); } static void test_insert_invalid(int family, int sotype, int mapfd) @@ -327,13 +384,13 @@ close: xclose(s); } -static void test_insert_listening(int family, int sotype, int mapfd) +static void test_insert(int family, int sotype, int mapfd) { u64 value; u32 key; int s; - s = listen_loopback(family, sotype); + s = socket_loopback(family, sotype); if (s < 0) return; @@ -349,7 +406,7 @@ static void test_delete_after_insert(int family, int sotype, int mapfd) u32 key; int s; - s = listen_loopback(family, sotype); + s = socket_loopback(family, sotype); if (s < 0) return; @@ -366,7 +423,7 @@ static void test_delete_after_close(int family, int sotype, int mapfd) u64 value; u32 key; - s = listen_loopback(family, sotype); + s = socket_loopback(family, sotype); if (s < 0) return; @@ -390,7 +447,7 @@ static void test_lookup_after_insert(int family, int sotype, int mapfd) u32 key; int s; - s = listen_loopback(family, sotype); + s = socket_loopback(family, sotype); if (s < 0) return; @@ -417,7 +474,7 @@ static void test_lookup_after_delete(int family, int sotype, int mapfd) u64 value; u32 key; - s = listen_loopback(family, sotype); + s = socket_loopback(family, sotype); if (s < 0) return; @@ -439,7 +496,7 @@ static void test_lookup_32_bit_value(int family, int sotype, int mapfd) u32 key, value32; int err, s; - s = listen_loopback(family, sotype); + s = socket_loopback(family, sotype); if (s < 0) return; @@ -464,17 +521,17 @@ close: xclose(s); } -static void test_update_listening(int family, int sotype, int mapfd) +static void test_update_existing(int family, int sotype, int mapfd) { int s1, s2; u64 value; u32 key; - s1 = listen_loopback(family, sotype); + s1 = socket_loopback(family, sotype); if (s1 < 0) return; - s2 = listen_loopback(family, sotype); + s2 = socket_loopback(family, sotype); if (s2 < 0) goto close_s1; @@ -500,7 +557,7 @@ static void test_destroy_orphan_child(int family, int sotype, int mapfd) u64 value; u32 key; - s = listen_loopback(family, sotype); + s = socket_loopback(family, sotype); if (s < 0) return; @@ -534,7 +591,7 @@ static void test_clone_after_delete(int family, int sotype, int mapfd) u64 value; u32 key; - s = listen_loopback(family, sotype); + s = socket_loopback(family, sotype); if (s < 0) return; @@ -570,7 +627,7 @@ static void test_accept_after_delete(int family, int sotype, int mapfd) socklen_t len; u64 value; - s = listen_loopback(family, sotype); + s = socket_loopback(family, sotype | SOCK_NONBLOCK); if (s == -1) return; @@ -598,7 +655,7 @@ static void test_accept_after_delete(int family, int sotype, int mapfd) if (err) goto close_cli; - p = xaccept(s, NULL, NULL); + p = xaccept_nonblock(s, NULL, NULL); if (p == -1) goto close_cli; @@ -624,7 +681,7 @@ static void test_accept_before_delete(int family, int sotype, int mapfd) socklen_t len; u64 value; - s = listen_loopback(family, sotype); + s = socket_loopback(family, sotype | SOCK_NONBLOCK); if (s == -1) return; @@ -647,7 +704,7 @@ static void test_accept_before_delete(int family, int sotype, int mapfd) if (err) goto close_cli; - p = xaccept(s, NULL, NULL); + p = xaccept_nonblock(s, NULL, NULL); if (p == -1) goto close_cli; @@ -711,7 +768,7 @@ static void *connect_accept_thread(void *arg) break; } - p = xaccept(s, NULL, NULL); + p = xaccept_nonblock(s, NULL, NULL); if (p < 0) { xclose(c); break; @@ -735,7 +792,7 @@ static void test_syn_recv_insert_delete(int family, int sotype, int mapfd) int err, s; u64 value; - s = listen_loopback(family, sotype | SOCK_NONBLOCK); + s = socket_loopback(family, sotype | SOCK_NONBLOCK); if (s < 0) return; @@ -877,7 +934,7 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, zero_verdict_count(verd_mapfd); - s = listen_loopback(family, sotype | SOCK_NONBLOCK); + s = socket_loopback(family, sotype | SOCK_NONBLOCK); if (s < 0) return; @@ -893,7 +950,7 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, if (err) goto close_cli0; - p0 = xaccept(s, NULL, NULL); + p0 = xaccept_nonblock(s, NULL, NULL); if (p0 < 0) goto close_cli0; @@ -904,7 +961,7 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, if (err) goto close_cli1; - p1 = xaccept(s, NULL, NULL); + p1 = xaccept_nonblock(s, NULL, NULL); if (p1 < 0) goto close_cli1; @@ -1009,7 +1066,7 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd, zero_verdict_count(verd_mapfd); - s = listen_loopback(family, sotype | SOCK_NONBLOCK); + s = socket_loopback(family, sotype | SOCK_NONBLOCK); if (s < 0) return; @@ -1025,7 +1082,7 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd, if (err) goto close_cli; - p = xaccept(s, NULL, NULL); + p = xaccept_nonblock(s, NULL, NULL); if (p < 0) goto close_cli; @@ -1113,14 +1170,15 @@ static void test_reuseport_select_listening(int family, int sotype, { struct sockaddr_storage addr; unsigned int pass; - int s, c, p, err; + int s, c, err; socklen_t len; u64 value; u32 key; zero_verdict_count(verd_map); - s = listen_loopback_reuseport(family, sotype, reuseport_prog); + s = socket_loopback_reuseport(family, sotype | SOCK_NONBLOCK, + reuseport_prog); if (s < 0) return; @@ -1142,19 +1200,33 @@ static void test_reuseport_select_listening(int family, int sotype, if (err) goto close_cli; - p = xaccept(s, NULL, NULL); - if (p < 0) - goto close_cli; + if (sotype == SOCK_STREAM) { + int p; + + p = xaccept_nonblock(s, NULL, NULL); + if (p < 0) + goto close_cli; + xclose(p); + } else { + char b = 'a'; + ssize_t n; + + n = xsend(c, &b, sizeof(b), 0); + if (n == -1) + goto close_cli; + + n = xrecv_nonblock(s, &b, sizeof(b), 0); + if (n == -1) + goto close_cli; + } key = SK_PASS; err = xbpf_map_lookup_elem(verd_map, &key, &pass); if (err) - goto close_peer; + goto close_cli; if (pass != 1) FAIL("want pass count 1, have %d", pass); -close_peer: - xclose(p); close_cli: xclose(c); close_srv: @@ -1174,7 +1246,7 @@ static void test_reuseport_select_connected(int family, int sotype, zero_verdict_count(verd_map); - s = listen_loopback_reuseport(family, sotype, reuseport_prog); + s = socket_loopback_reuseport(family, sotype, reuseport_prog); if (s < 0) return; @@ -1198,9 +1270,24 @@ static void test_reuseport_select_connected(int family, int sotype, if (err) goto close_cli0; - p0 = xaccept(s, NULL, NULL); - if (err) - goto close_cli0; + if (sotype == SOCK_STREAM) { + p0 = xaccept_nonblock(s, NULL, NULL); + if (p0 < 0) + goto close_cli0; + } else { + p0 = xsocket(family, sotype, 0); + if (p0 < 0) + goto close_cli0; + + len = sizeof(addr); + err = xgetsockname(c0, sockaddr(&addr), &len); + if (err) + goto close_cli0; + + err = xconnect(p0, sockaddr(&addr), len); + if (err) + goto close_cli0; + } /* Update sock_map[0] to redirect to a connected socket */ key = 0; @@ -1213,8 +1300,24 @@ static void test_reuseport_select_connected(int family, int sotype, if (c1 < 0) goto close_peer0; + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + goto close_srv; + errno = 0; err = connect(c1, sockaddr(&addr), len); + if (sotype == SOCK_DGRAM) { + char b = 'a'; + ssize_t n; + + n = xsend(c1, &b, sizeof(b), 0); + if (n == -1) + goto close_cli1; + + n = recv_timeout(c1, &b, sizeof(b), 0, IO_TIMEOUT_SEC); + err = n == -1; + } if (!err || errno != ECONNREFUSED) FAIL_ERRNO("connect: expected ECONNREFUSED"); @@ -1249,11 +1352,11 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map, zero_verdict_count(verd_map); /* Create two listeners, each in its own reuseport group */ - s1 = listen_loopback_reuseport(family, sotype, reuseport_prog); + s1 = socket_loopback_reuseport(family, sotype, reuseport_prog); if (s1 < 0) return; - s2 = listen_loopback_reuseport(family, sotype, reuseport_prog); + s2 = socket_loopback_reuseport(family, sotype, reuseport_prog); if (s2 < 0) goto close_srv1; @@ -1278,7 +1381,18 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map, goto close_srv2; err = connect(c, sockaddr(&addr), len); - if (err && errno != ECONNREFUSED) { + if (sotype == SOCK_DGRAM) { + char b = 'a'; + ssize_t n; + + n = xsend(c, &b, sizeof(b), 0); + if (n == -1) + goto close_cli; + + n = recv_timeout(c, &b, sizeof(b), 0, IO_TIMEOUT_SEC); + err = n == -1; + } + if (!err || errno != ECONNREFUSED) { FAIL_ERRNO("connect: expected ECONNREFUSED"); goto close_cli; } @@ -1299,9 +1413,9 @@ close_srv1: xclose(s1); } -#define TEST(fn) \ +#define TEST(fn, ...) \ { \ - fn, #fn \ + fn, #fn, __VA_ARGS__ \ } static void test_ops_cleanup(const struct bpf_map *map) @@ -1350,18 +1464,31 @@ static const char *map_type_str(const struct bpf_map *map) } } +static const char *sotype_str(int sotype) +{ + switch (sotype) { + case SOCK_DGRAM: + return "UDP"; + case SOCK_STREAM: + return "TCP"; + default: + return "unknown"; + } +} + static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map, int family, int sotype) { const struct op_test { void (*fn)(int family, int sotype, int mapfd); const char *name; + int sotype; } tests[] = { /* insert */ TEST(test_insert_invalid), TEST(test_insert_opened), - TEST(test_insert_bound), - TEST(test_insert_listening), + TEST(test_insert_bound, SOCK_STREAM), + TEST(test_insert), /* delete */ TEST(test_delete_after_insert), TEST(test_delete_after_close), @@ -1370,28 +1497,32 @@ static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map, TEST(test_lookup_after_delete), TEST(test_lookup_32_bit_value), /* update */ - TEST(test_update_listening), + TEST(test_update_existing), /* races with insert/delete */ - TEST(test_destroy_orphan_child), - TEST(test_syn_recv_insert_delete), - TEST(test_race_insert_listen), + TEST(test_destroy_orphan_child, SOCK_STREAM), + TEST(test_syn_recv_insert_delete, SOCK_STREAM), + TEST(test_race_insert_listen, SOCK_STREAM), /* child clone */ - TEST(test_clone_after_delete), - TEST(test_accept_after_delete), - TEST(test_accept_before_delete), + TEST(test_clone_after_delete, SOCK_STREAM), + TEST(test_accept_after_delete, SOCK_STREAM), + TEST(test_accept_before_delete, SOCK_STREAM), }; - const char *family_name, *map_name; + const char *family_name, *map_name, *sotype_name; const struct op_test *t; char s[MAX_TEST_NAME]; int map_fd; family_name = family_str(family); map_name = map_type_str(map); + sotype_name = sotype_str(sotype); map_fd = bpf_map__fd(map); for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { - snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, - t->name); + snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name, + sotype_name, t->name); + + if (t->sotype != 0 && t->sotype != sotype) + continue; if (!test__start_subtest(s)) continue; @@ -1424,6 +1555,7 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map, for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, t->name); + if (!test__start_subtest(s)) continue; @@ -1438,26 +1570,31 @@ static void test_reuseport(struct test_sockmap_listen *skel, void (*fn)(int family, int sotype, int socket_map, int verdict_map, int reuseport_prog); const char *name; + int sotype; } tests[] = { TEST(test_reuseport_select_listening), TEST(test_reuseport_select_connected), TEST(test_reuseport_mixed_groups), }; int socket_map, verdict_map, reuseport_prog; - const char *family_name, *map_name; + const char *family_name, *map_name, *sotype_name; const struct reuseport_test *t; char s[MAX_TEST_NAME]; family_name = family_str(family); map_name = map_type_str(map); + sotype_name = sotype_str(sotype); socket_map = bpf_map__fd(map); verdict_map = bpf_map__fd(skel->maps.verdict_map); reuseport_prog = bpf_program__fd(skel->progs.prog_reuseport); for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { - snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, - t->name); + snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name, + sotype_name, t->name); + + if (t->sotype != 0 && t->sotype != sotype) + continue; if (!test__start_subtest(s)) continue; @@ -1470,8 +1607,10 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map, int family) { test_ops(skel, map, family, SOCK_STREAM); + test_ops(skel, map, family, SOCK_DGRAM); test_redir(skel, map, family, SOCK_STREAM); test_reuseport(skel, map, family, SOCK_STREAM); + test_reuseport(skel, map, family, SOCK_DGRAM); } void test_sockmap_listen(void) diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c index f4cd60d6fba2..e08f6bb17700 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c @@ -188,7 +188,7 @@ static int start_server(void) }; int fd; - fd = socket(AF_INET, SOCK_STREAM, 0); + fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0); if (fd < 0) { log_err("Failed to create server socket"); return -1; @@ -205,6 +205,7 @@ static int start_server(void) static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER; +static volatile bool server_done = false; static void *server_thread(void *arg) { @@ -222,23 +223,24 @@ static void *server_thread(void *arg) if (CHECK_FAIL(err < 0)) { perror("Failed to listed on socket"); - return NULL; + return ERR_PTR(err); } - client_fd = accept(fd, (struct sockaddr *)&addr, &len); + while (!server_done) { + client_fd = accept(fd, (struct sockaddr *)&addr, &len); + if (client_fd == -1 && errno == EAGAIN) { + usleep(50); + continue; + } + break; + } if (CHECK_FAIL(client_fd < 0)) { perror("Failed to accept client"); - return NULL; + return ERR_PTR(err); } - /* Wait for the next connection (that never arrives) - * to keep this thread alive to prevent calling - * close() on client_fd. - */ - if (CHECK_FAIL(accept(fd, (struct sockaddr *)&addr, &len) >= 0)) { - perror("Unexpected success in second accept"); - return NULL; - } + while (!server_done) + usleep(50); close(client_fd); @@ -249,6 +251,7 @@ void test_tcp_rtt(void) { int server_fd, cgroup_fd; pthread_t tid; + void *server_res; cgroup_fd = test__join_cgroup("/tcp_rtt"); if (CHECK_FAIL(cgroup_fd < 0)) @@ -267,6 +270,11 @@ void test_tcp_rtt(void) pthread_mutex_unlock(&server_started_mtx); CHECK_FAIL(run_test(cgroup_fd, server_fd)); + + server_done = true; + pthread_join(tid, &server_res); + CHECK_FAIL(IS_ERR(server_res)); + close_server_fd: close(server_fd); close_cgroup_fd: diff --git a/tools/testing/selftests/bpf/prog_tests/vmlinux.c b/tools/testing/selftests/bpf/prog_tests/vmlinux.c new file mode 100644 index 000000000000..04939eda1325 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/vmlinux.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <test_progs.h> +#include <time.h> +#include "test_vmlinux.skel.h" + +#define MY_TV_NSEC 1337 + +static void nsleep() +{ + struct timespec ts = { .tv_nsec = MY_TV_NSEC }; + + (void)nanosleep(&ts, NULL); +} + +void test_vmlinux(void) +{ + int duration = 0, err; + struct test_vmlinux* skel; + struct test_vmlinux__bss *bss; + + skel = test_vmlinux__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + bss = skel->bss; + + err = test_vmlinux__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + /* trigger everything */ + nsleep(); + + CHECK(!bss->tp_called, "tp", "not called\n"); + CHECK(!bss->raw_tp_called, "raw_tp", "not called\n"); + CHECK(!bss->tp_btf_called, "tp_btf", "not called\n"); + CHECK(!bss->kprobe_called, "kprobe", "not called\n"); + CHECK(!bss->fentry_called, "fentry", "not called\n"); + +cleanup: + test_vmlinux__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c index 4ba011031d4c..a0f688c37023 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c @@ -4,17 +4,51 @@ #include "test_xdp.skel.h" #include "test_xdp_bpf2bpf.skel.h" +struct meta { + int ifindex; + int pkt_len; +}; + +static void on_sample(void *ctx, int cpu, void *data, __u32 size) +{ + int duration = 0; + struct meta *meta = (struct meta *)data; + struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta); + + if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta), + "check_size", "size %u < %zu\n", + size, sizeof(pkt_v4) + sizeof(*meta))) + return; + + if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex", + "meta->ifindex = %d\n", meta->ifindex)) + return; + + if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len", + "meta->pkt_len = %zd\n", sizeof(pkt_v4))) + return; + + if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)), + "check_packet_content", "content not the same\n")) + return; + + *(bool *)ctx = true; +} + void test_xdp_bpf2bpf(void) { __u32 duration = 0, retval, size; char buf[128]; int err, pkt_fd, map_fd; + bool passed = false; struct iphdr *iph = (void *)buf + sizeof(struct ethhdr); struct iptnl_info value4 = {.family = AF_INET}; struct test_xdp *pkt_skel = NULL; struct test_xdp_bpf2bpf *ftrace_skel = NULL; struct vip key4 = {.protocol = 6, .family = AF_INET}; struct bpf_program *prog; + struct perf_buffer *pb = NULL; + struct perf_buffer_opts pb_opts = {}; /* Load XDP program to introspect */ pkt_skel = test_xdp__open_and_load(); @@ -50,6 +84,14 @@ void test_xdp_bpf2bpf(void) if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err)) goto out; + /* Set up perf buffer */ + pb_opts.sample_cb = on_sample; + pb_opts.ctx = &passed; + pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), + 1, &pb_opts); + if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb))) + goto out; + /* Run test program */ err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); @@ -60,6 +102,15 @@ void test_xdp_bpf2bpf(void) err, errno, retval, size)) goto out; + /* Make sure bpf_xdp_output() was triggered and it sent the expected + * data to the perf ring buffer. + */ + err = perf_buffer__poll(pb, 100); + if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err)) + goto out; + + CHECK_FAIL(!passed); + /* Verify test results */ if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"), "result", "fentry failed err %llu\n", @@ -70,6 +121,8 @@ void test_xdp_bpf2bpf(void) "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit); out: + if (pb) + perf_buffer__free(pb); test_xdp__destroy(pkt_skel); test_xdp_bpf2bpf__destroy(ftrace_skel); } diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c index b631fb5032d2..127ea762a062 100644 --- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c +++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c @@ -9,7 +9,7 @@ #include <linux/bpf.h> #include <linux/types.h> #include <bpf/bpf_helpers.h> -#include "bpf_trace_helpers.h" +#include <bpf/bpf_tracing.h> #include "bpf_tcp_helpers.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c index d4a02fe44a12..31975c96e2c9 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c @@ -13,7 +13,7 @@ enum e1 { enum e2 { C = 100, - D = -100, + D = 4294967295, E = 0, }; diff --git a/tools/testing/selftests/bpf/progs/fentry_test.c b/tools/testing/selftests/bpf/progs/fentry_test.c index 38d3a82144ca..9365b686f84b 100644 --- a/tools/testing/selftests/bpf/progs/fentry_test.c +++ b/tools/testing/selftests/bpf/progs/fentry_test.c @@ -2,7 +2,7 @@ /* Copyright (c) 2019 Facebook */ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -#include "bpf_trace_helpers.h" +#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c index c329fccf9842..98e1efe14549 100644 --- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c @@ -5,7 +5,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> -#include "bpf_trace_helpers.h" +#include <bpf/bpf_tracing.h> struct sk_buff { unsigned int len; diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c index 92f3fa47cf40..85c0b516d6ee 100644 --- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c +++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c @@ -2,7 +2,7 @@ /* Copyright (c) 2019 Facebook */ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -#include "bpf_trace_helpers.h" +#include <bpf/bpf_tracing.h> struct sk_buff { unsigned int len; diff --git a/tools/testing/selftests/bpf/progs/fexit_test.c b/tools/testing/selftests/bpf/progs/fexit_test.c index 348109b9ea07..bd1e17d8024c 100644 --- a/tools/testing/selftests/bpf/progs/fexit_test.c +++ b/tools/testing/selftests/bpf/progs/fexit_test.c @@ -2,7 +2,7 @@ /* Copyright (c) 2019 Facebook */ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -#include "bpf_trace_helpers.h" +#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c index 8f48a909f079..a46a264ce24e 100644 --- a/tools/testing/selftests/bpf/progs/kfree_skb.c +++ b/tools/testing/selftests/bpf/progs/kfree_skb.c @@ -4,7 +4,7 @@ #include <stdbool.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> -#include "bpf_trace_helpers.h" +#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; struct { diff --git a/tools/testing/selftests/bpf/progs/modify_return.c b/tools/testing/selftests/bpf/progs/modify_return.c new file mode 100644 index 000000000000..8b7466a15c6b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/modify_return.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2020 Google LLC. + */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +static int sequence = 0; +__s32 input_retval = 0; + +__u64 fentry_result = 0; +SEC("fentry/bpf_modify_return_test") +int BPF_PROG(fentry_test, int a, __u64 b) +{ + sequence++; + fentry_result = (sequence == 1); + return 0; +} + +__u64 fmod_ret_result = 0; +SEC("fmod_ret/bpf_modify_return_test") +int BPF_PROG(fmod_ret_test, int a, int *b, int ret) +{ + sequence++; + /* This is the first fmod_ret program, the ret passed should be 0 */ + fmod_ret_result = (sequence == 2 && ret == 0); + return input_retval; +} + +__u64 fexit_result = 0; +SEC("fexit/bpf_modify_return_test") +int BPF_PROG(fexit_test, int a, __u64 b, int ret) +{ + sequence++; + /* If the input_reval is non-zero a successful modification should have + * occurred. + */ + if (input_retval) + fexit_result = (sequence == 3 && ret == input_retval); + else + fexit_result = (sequence == 3 && ret == 4); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c index dd8fae6660ab..8056a4c6d918 100644 --- a/tools/testing/selftests/bpf/progs/test_attach_probe.c +++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c @@ -4,6 +4,7 @@ #include <linux/ptrace.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> int kprobe_res = 0; int kretprobe_res = 0; @@ -18,7 +19,7 @@ int handle_kprobe(struct pt_regs *ctx) } SEC("kretprobe/sys_nanosleep") -int handle_kretprobe(struct pt_regs *ctx) +int BPF_KRETPROBE(handle_kretprobe) { kretprobe_res = 2; return 0; diff --git a/tools/testing/selftests/bpf/progs/test_link_pinning.c b/tools/testing/selftests/bpf/progs/test_link_pinning.c new file mode 100644 index 000000000000..bbf2a5264dc0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_link_pinning.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <stdbool.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +int in = 0; +int out = 0; + +SEC("raw_tp/sys_enter") +int raw_tp_prog(const void *ctx) +{ + out = in; + return 0; +} + +SEC("tp_btf/sys_enter") +int tp_btf_prog(const void *ctx) +{ + out = in; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c new file mode 100644 index 000000000000..1dca70a6de2f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Carlos Neira cneirabustos@gmail.com */ + +#include <linux/bpf.h> +#include <stdint.h> +#include <bpf/bpf_helpers.h> + +static volatile struct { + __u64 dev; + __u64 ino; + __u64 pid_tgid; + __u64 user_pid_tgid; +} res; + +SEC("raw_tracepoint/sys_enter") +int trace(void *ctx) +{ + __u64 ns_pid_tgid, expected_pid; + struct bpf_pidns_info nsdata; + __u32 key = 0; + + if (bpf_get_ns_current_pid_tgid(res.dev, res.ino, &nsdata, + sizeof(struct bpf_pidns_info))) + return 0; + + ns_pid_tgid = (__u64)nsdata.tgid << 32 | nsdata.pid; + expected_pid = res.user_pid_tgid; + + if (expected_pid != ns_pid_tgid) + return 0; + + res.pid_tgid = ns_pid_tgid; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c index bfe9fbcb9684..56a50b25cd33 100644 --- a/tools/testing/selftests/bpf/progs/test_overhead.c +++ b/tools/testing/selftests/bpf/progs/test_overhead.c @@ -6,7 +6,6 @@ #include <linux/ptrace.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include "bpf_trace_helpers.h" struct task_struct; @@ -17,11 +16,9 @@ int BPF_KPROBE(prog1, struct task_struct *tsk, const char *buf, bool exec) } SEC("kretprobe/__set_task_comm") -int BPF_KRETPROBE(prog2, - struct task_struct *tsk, const char *buf, bool exec, - int ret) +int BPF_KRETPROBE(prog2, int ret) { - return !PT_REGS_PARM1(ctx) && ret; + return ret; } SEC("raw_tp/task_rename") diff --git a/tools/testing/selftests/bpf/progs/test_perf_branches.c b/tools/testing/selftests/bpf/progs/test_perf_branches.c index 0f7e27d97567..a1ccc831c882 100644 --- a/tools/testing/selftests/bpf/progs/test_perf_branches.c +++ b/tools/testing/selftests/bpf/progs/test_perf_branches.c @@ -5,7 +5,7 @@ #include <linux/ptrace.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -#include "bpf_trace_helpers.h" +#include <bpf/bpf_tracing.h> int valid = 0; int required_size_out = 0; diff --git a/tools/testing/selftests/bpf/progs/test_perf_buffer.c b/tools/testing/selftests/bpf/progs/test_perf_buffer.c index ebfcc9f50c35..ad59c4c9aba8 100644 --- a/tools/testing/selftests/bpf/progs/test_perf_buffer.c +++ b/tools/testing/selftests/bpf/progs/test_perf_buffer.c @@ -4,7 +4,7 @@ #include <linux/ptrace.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -#include "bpf_trace_helpers.h" +#include <bpf/bpf_tracing.h> struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c index d556b1572cc6..89b3532ccc75 100644 --- a/tools/testing/selftests/bpf/progs/test_probe_user.c +++ b/tools/testing/selftests/bpf/progs/test_probe_user.c @@ -7,7 +7,6 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include "bpf_trace_helpers.h" static struct sockaddr_in old; diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c index 202de3938494..b02ea589ce7e 100644 --- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c +++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c @@ -23,6 +23,8 @@ int process(struct __sk_buff *skb) return 1; if (skb->gso_segs != 8) return 1; + if (skb->gso_size != 10) + return 1; return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_trampoline_count.c b/tools/testing/selftests/bpf/progs/test_trampoline_count.c index e51e6e3a81c2..f030e469d05b 100644 --- a/tools/testing/selftests/bpf/progs/test_trampoline_count.c +++ b/tools/testing/selftests/bpf/progs/test_trampoline_count.c @@ -2,7 +2,8 @@ #include <stdbool.h> #include <stddef.h> #include <linux/bpf.h> -#include "bpf_trace_helpers.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> struct task_struct; diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c new file mode 100644 index 000000000000..5611b564d3b1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include "vmlinux.h" +#include <asm/unistd.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +#define MY_TV_NSEC 1337 + +bool tp_called = false; +bool raw_tp_called = false; +bool tp_btf_called = false; +bool kprobe_called = false; +bool fentry_called = false; + +SEC("tp/syscalls/sys_enter_nanosleep") +int handle__tp(struct trace_event_raw_sys_enter *args) +{ + struct __kernel_timespec *ts; + + if (args->id != __NR_nanosleep) + return 0; + + ts = (void *)args->args[0]; + if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC) + return 0; + + tp_called = true; + return 0; +} + +SEC("raw_tp/sys_enter") +int BPF_PROG(handle__raw_tp, struct pt_regs *regs, long id) +{ + struct __kernel_timespec *ts; + + if (id != __NR_nanosleep) + return 0; + + ts = (void *)PT_REGS_PARM1_CORE(regs); + if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC) + return 0; + + raw_tp_called = true; + return 0; +} + +SEC("tp_btf/sys_enter") +int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id) +{ + struct __kernel_timespec *ts; + + if (id != __NR_nanosleep) + return 0; + + ts = (void *)PT_REGS_PARM1_CORE(regs); + if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC) + return 0; + + tp_btf_called = true; + return 0; +} + +SEC("kprobe/hrtimer_nanosleep") +int BPF_KPROBE(handle__kprobe, + ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid) +{ + if (rqtp == MY_TV_NSEC) + kprobe_called = true; + return 0; +} + +SEC("fentry/hrtimer_nanosleep") +int BPF_PROG(handle__fentry, + ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid) +{ + if (rqtp == MY_TV_NSEC) + fentry_called = true; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c index b840fc9e3ed5..a038e827f850 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/bpf.h> +#include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> -#include "bpf_trace_helpers.h" + +char _license[] SEC("license") = "GPL"; struct net_device { /* Structure does not need to contain all entries, @@ -27,10 +29,32 @@ struct xdp_buff { struct xdp_rxq_info *rxq; } __attribute__((preserve_access_index)); +struct meta { + int ifindex; + int pkt_len; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} perf_buf_map SEC(".maps"); + __u64 test_result_fentry = 0; SEC("fentry/FUNC") int BPF_PROG(trace_on_entry, struct xdp_buff *xdp) { + struct meta meta; + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + + meta.ifindex = xdp->rxq->dev->ifindex; + meta.pkt_len = data_end - data; + bpf_xdp_output(xdp, &perf_buf_map, + ((__u64) meta.pkt_len << 32) | + BPF_F_CURRENT_CPU, + &meta, sizeof(meta)); + test_result_fentry = xdp->rxq->dev->ifindex; return 0; } diff --git a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c new file mode 100644 index 000000000000..ed253f252cd0 --- /dev/null +++ b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Carlos Neira cneirabustos@gmail.com */ +#define _GNU_SOURCE +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sched.h> +#include <sys/wait.h> +#include <sys/mount.h> +#include "test_progs.h" + +#define CHECK_NEWNS(condition, tag, format...) ({ \ + int __ret = !!(condition); \ + if (__ret) { \ + printf("%s:FAIL:%s ", __func__, tag); \ + printf(format); \ + } else { \ + printf("%s:PASS:%s\n", __func__, tag); \ + } \ + __ret; \ +}) + +struct bss { + __u64 dev; + __u64 ino; + __u64 pid_tgid; + __u64 user_pid_tgid; +}; + +int main(int argc, char **argv) +{ + pid_t pid; + int exit_code = 1; + struct stat st; + + printf("Testing bpf_get_ns_current_pid_tgid helper in new ns\n"); + + if (stat("/proc/self/ns/pid", &st)) { + perror("stat failed on /proc/self/ns/pid ns\n"); + printf("%s:FAILED\n", argv[0]); + return exit_code; + } + + if (CHECK_NEWNS(unshare(CLONE_NEWPID | CLONE_NEWNS), + "unshare CLONE_NEWPID | CLONE_NEWNS", "error errno=%d\n", errno)) + return exit_code; + + pid = fork(); + if (pid == -1) { + perror("Fork() failed\n"); + printf("%s:FAILED\n", argv[0]); + return exit_code; + } + + if (pid > 0) { + int status; + + usleep(5); + waitpid(pid, &status, 0); + return 0; + } else { + + pid = fork(); + if (pid == -1) { + perror("Fork() failed\n"); + printf("%s:FAILED\n", argv[0]); + return exit_code; + } + + if (pid > 0) { + int status; + waitpid(pid, &status, 0); + return 0; + } else { + if (CHECK_NEWNS(mount("none", "/proc", NULL, MS_PRIVATE|MS_REC, NULL), + "Unmounting proc", "Cannot umount proc! errno=%d\n", errno)) + return exit_code; + + if (CHECK_NEWNS(mount("proc", "/proc", "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL), + "Mounting proc", "Cannot mount proc! errno=%d\n", errno)) + return exit_code; + + const char *probe_name = "raw_tracepoint/sys_enter"; + const char *file = "test_ns_current_pid_tgid.o"; + struct bpf_link *link = NULL; + struct bpf_program *prog; + struct bpf_map *bss_map; + struct bpf_object *obj; + int exit_code = 1; + int err, key = 0; + struct bss bss; + struct stat st; + __u64 id; + + obj = bpf_object__open_file(file, NULL); + if (CHECK_NEWNS(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj))) + return exit_code; + + err = bpf_object__load(obj); + if (CHECK_NEWNS(err, "obj_load", "err %d errno %d\n", err, errno)) + goto cleanup; + + bss_map = bpf_object__find_map_by_name(obj, "test_ns_.bss"); + if (CHECK_NEWNS(!bss_map, "find_bss_map", "failed\n")) + goto cleanup; + + prog = bpf_object__find_program_by_title(obj, probe_name); + if (CHECK_NEWNS(!prog, "find_prog", "prog '%s' not found\n", + probe_name)) + goto cleanup; + + memset(&bss, 0, sizeof(bss)); + pid_t tid = syscall(SYS_gettid); + pid_t pid = getpid(); + + id = (__u64) tid << 32 | pid; + bss.user_pid_tgid = id; + + if (CHECK_NEWNS(stat("/proc/self/ns/pid", &st), + "stat new ns", "Failed to stat /proc/self/ns/pid errno=%d\n", errno)) + goto cleanup; + + bss.dev = st.st_dev; + bss.ino = st.st_ino; + + err = bpf_map_update_elem(bpf_map__fd(bss_map), &key, &bss, 0); + if (CHECK_NEWNS(err, "setting_bss", "failed to set bss : %d\n", err)) + goto cleanup; + + link = bpf_program__attach_raw_tracepoint(prog, "sys_enter"); + if (CHECK_NEWNS(IS_ERR(link), "attach_raw_tp", "err %ld\n", + PTR_ERR(link))) { + link = NULL; + goto cleanup; + } + + /* trigger some syscalls */ + usleep(1); + + err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &key, &bss); + if (CHECK_NEWNS(err, "set_bss", "failed to get bss : %d\n", err)) + goto cleanup; + + if (CHECK_NEWNS(id != bss.pid_tgid, "Compare user pid/tgid vs. bpf pid/tgid", + "User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid)) + goto cleanup; + + exit_code = 0; + printf("%s:PASS\n", argv[0]); +cleanup: + if (!link) { + bpf_link__destroy(link); + link = NULL; + } + bpf_object__close(obj); + } + } +} diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index a969c77e9456..f85a06512541 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -29,6 +29,24 @@ struct prog_test_def { int old_error_cnt; }; +/* Override C runtime library's usleep() implementation to ensure nanosleep() + * is always called. Usleep is frequently used in selftests as a way to + * trigger kprobe and tracepoints. + */ +int usleep(useconds_t usec) +{ + struct timespec ts; + + if (usec > 999999) { + ts.tv_sec = usec / 1000000; + ts.tv_nsec = usec % 1000000; + } else { + ts.tv_sec = 0; + ts.tv_nsec = usec; + } + return nanosleep(&ts, NULL); +} + static bool should_run(struct test_selector *sel, int num, const char *name) { int i; @@ -198,7 +216,7 @@ int bpf_find_map(const char *test, struct bpf_object *obj, const char *name) map = bpf_object__find_map_by_name(obj, name); if (!map) { - printf("%s:FAIL:map '%s' not found\n", test, name); + fprintf(stdout, "%s:FAIL:map '%s' not found\n", test, name); test__fail(); return -1; } @@ -369,7 +387,7 @@ static int libbpf_print_fn(enum libbpf_print_level level, { if (env.verbosity < VERBOSE_VERY && level == LIBBPF_DEBUG) return 0; - vprintf(format, args); + vfprintf(stdout, format, args); return 0; } @@ -615,7 +633,7 @@ int cd_flavor_subdir(const char *exec_name) if (!flavor) return 0; flavor++; - printf("Switching to flavor '%s' subdirectory...\n", flavor); + fprintf(stdout, "Switching to flavor '%s' subdirectory...\n", flavor); return chdir(flavor); } @@ -698,8 +716,8 @@ int main(int argc, char **argv) cleanup_cgroup_environment(); } stdio_restore(); - printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", - env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); + fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", + env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); free(env.test_selector.blacklist.strs); free(env.test_selector.whitelist.strs); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index bcfa9ef23fda..fd85fa61dbf7 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -109,10 +109,10 @@ extern struct ipv6_packet pkt_v6; int __save_errno = errno; \ if (__ret) { \ test__fail(); \ - printf("%s:FAIL:%s ", __func__, tag); \ - printf(format); \ + fprintf(stdout, "%s:FAIL:%s ", __func__, tag); \ + fprintf(stdout, ##format); \ } else { \ - printf("%s:PASS:%s %d nsec\n", \ + fprintf(stdout, "%s:PASS:%s %d nsec\n", \ __func__, tag, duration); \ } \ errno = __save_errno; \ @@ -124,7 +124,7 @@ extern struct ipv6_packet pkt_v6; int __save_errno = errno; \ if (__ret) { \ test__fail(); \ - printf("%s:FAIL:%d\n", __func__, __LINE__); \ + fprintf(stdout, "%s:FAIL:%d\n", __func__, __LINE__); \ } \ errno = __save_errno; \ __ret; \ diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c index d438193804b2..2e16b8e268f2 100644 --- a/tools/testing/selftests/bpf/verifier/ctx_skb.c +++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c @@ -1011,6 +1011,53 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { + "read gso_size from CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, gso_size)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "read gso_size from CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, gso_size)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "write gso_size from CGROUP_SKB", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, gso_size)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .result_unpriv = REJECT, + .errstr = "invalid bpf_context access off=176 size=4", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "read gso_size from CLS", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, gso_size)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ "check wire_len is not readable by sockets", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh index 8f833678ac4d..0d347d48c112 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -389,17 +389,14 @@ check_marking() ((pct $cond)) } -do_ecn_test() +ecn_test_common() { + local name=$1; shift local vlan=$1; shift local limit=$1; shift local backlog local pct - # Main stream. - start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ - $h3_mac tos=0x01 - # Build the below-the-limit backlog using UDP. We could use TCP just # fine, but this way we get a proof that UDP is accepted when queue # length is below the limit. The main stream is using TCP, and if the @@ -409,7 +406,7 @@ do_ecn_test() check_err $? "Could not build the requested backlog" pct=$(check_marking $vlan "== 0") check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." - log_test "TC $((vlan - 10)): ECN backlog < limit" + log_test "TC $((vlan - 10)): $name backlog < limit" # Now push TCP, because non-TCP traffic would be early-dropped after the # backlog crosses the limit, and we want to make sure that the backlog @@ -419,7 +416,20 @@ do_ecn_test() check_err $? "Could not build the requested backlog" pct=$(check_marking $vlan ">= 95") check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95." - log_test "TC $((vlan - 10)): ECN backlog > limit" + log_test "TC $((vlan - 10)): $name backlog > limit" +} + +do_ecn_test() +{ + local vlan=$1; shift + local limit=$1; shift + local name=ECN + + start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ + $h3_mac tos=0x01 + sleep 1 + + ecn_test_common "$name" $vlan $limit # Up there we saw that UDP gets accepted when backlog is below the # limit. Now that it is above, it should all get dropped, and backlog @@ -427,7 +437,31 @@ do_ecn_test() RET=0 build_backlog $vlan $((2 * limit)) udp >/dev/null check_fail $? "UDP traffic went into backlog instead of being early-dropped" - log_test "TC $((vlan - 10)): ECN backlog > limit: UDP early-dropped" + log_test "TC $((vlan - 10)): $name backlog > limit: UDP early-dropped" + + stop_traffic + sleep 1 +} + +do_ecn_nodrop_test() +{ + local vlan=$1; shift + local limit=$1; shift + local name="ECN nodrop" + + start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ + $h3_mac tos=0x01 + sleep 1 + + ecn_test_common "$name" $vlan $limit + + # Up there we saw that UDP gets accepted when backlog is below the + # limit. Now that it is above, in nodrop mode, make sure it goes to + # backlog as well. + RET=0 + build_backlog $vlan $((2 * limit)) udp >/dev/null + check_err $? "UDP traffic was early-dropped instead of getting into backlog" + log_test "TC $((vlan - 10)): $name backlog > limit: UDP not dropped" stop_traffic sleep 1 diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh index af83efe9ccf1..1c36c576613b 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh @@ -4,6 +4,7 @@ ALL_TESTS=" ping_ipv4 ecn_test + ecn_nodrop_test red_test mc_backlog_test " @@ -50,6 +51,16 @@ ecn_test() uninstall_qdisc } +ecn_nodrop_test() +{ + install_qdisc ecn nodrop + + do_ecn_nodrop_test 10 $BACKLOG1 + do_ecn_nodrop_test 11 $BACKLOG2 + + uninstall_qdisc +} + red_test() { install_qdisc diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh index b2217493a88e..558667ea11ec 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh @@ -4,6 +4,7 @@ ALL_TESTS=" ping_ipv4 ecn_test + ecn_nodrop_test red_test mc_backlog_test " @@ -33,6 +34,13 @@ ecn_test() uninstall_qdisc } +ecn_nodrop_test() +{ + install_qdisc ecn nodrop + do_ecn_nodrop_test 10 $BACKLOG + uninstall_qdisc +} + red_test() { install_qdisc diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh index a0795227216e..efd798a85931 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh @@ -8,9 +8,9 @@ tc_flower_get_target() # The driver associates a counter with each tc filter, which means the # number of supported filters is bounded by the number of available # counters. - # Currently, the driver supports 12K (12,288) flow counters and six of + # Currently, the driver supports 30K (30,720) flow counters and six of # these are used for multicast routing. - local target=12282 + local target=30714 if ((! should_fail)); then echo $target diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh new file mode 100755 index 000000000000..20ed98fe5a60 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh @@ -0,0 +1,130 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + default_hw_stats_test + immediate_hw_stats_test + delayed_hw_stats_test + disabled_hw_stats_test +" +NUM_NETIFS=2 + +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 +} + +switch_create() +{ + simple_if_init $swp1 192.0.2.2/24 + tc qdisc add dev $swp1 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp1 clsact + simple_if_fini $swp1 192.0.2.2/24 +} + +hw_stats_test() +{ + RET=0 + + local name=$1 + local action_hw_stats=$2 + local occ_delta=$3 + local expected_packet_count=$4 + + local orig_occ=$(devlink_resource_get "counters" "flow" | jq '.["occ"]') + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop $action_hw_stats + check_err $? "Failed to add rule with $name hw_stats" + + local new_occ=$(devlink_resource_get "counters" "flow" | jq '.["occ"]') + local expected_occ=$((orig_occ + occ_delta)) + [ "$new_occ" == "$expected_occ" ] + check_err $? "Expected occupancy of $expected_occ, got $new_occ" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $swp1mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $swp1 ingress" 101 $expected_packet_count + check_err $? "Did not match incoming packet" + + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + + log_test "$name hw_stats" +} + +default_hw_stats_test() +{ + hw_stats_test "default" "" 2 1 +} + +immediate_hw_stats_test() +{ + hw_stats_test "immediate" "hw_stats immediate" 2 1 +} + +delayed_hw_stats_test() +{ + RET=0 + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop hw_stats delayed + check_fail $? "Unexpected success in adding rule with delayed hw_stats" + + log_test "delayed hw_stats" +} + +disabled_hw_stats_test() +{ + hw_stats_test "disabled" "hw_stats disabled" 0 0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + h1mac=$(mac_get $h1) + swp1mac=$(mac_get $swp1) + + vrf_prepare + + h1_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h1_destroy + + vrf_cleanup +} + +check_tc_action_hw_stats_support + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/lkdtm/.gitignore b/tools/testing/selftests/lkdtm/.gitignore new file mode 100644 index 000000000000..f26212605b6b --- /dev/null +++ b/tools/testing/selftests/lkdtm/.gitignore @@ -0,0 +1,2 @@ +*.sh +!run.sh diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index ecc52d4c034d..91f9aea853b1 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -23,3 +23,4 @@ so_txtime tcp_fastopen_backup_key nettest fin_ack_lat +reuseaddr_ports_exhausted
\ No newline at end of file diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 287ae916ec0b..48063fd69924 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -12,6 +12,7 @@ TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_a TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh TEST_PROGS += fin_ack_lat.sh +TEST_PROGS += reuseaddr_ports_exhausted.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any @@ -22,6 +23,7 @@ TEST_GEN_FILES += tcp_fastopen_backup_key TEST_GEN_FILES += fin_ack_lat TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls +TEST_GEN_FILES += reuseaddr_ports_exhausted KSFT_KHDR_INSTALL := 1 include ../lib.mk diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 60273f1bc7d9..b7616704b55e 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -1041,6 +1041,27 @@ ipv6_addr_metric_test() fi log_test $rc 0 "Prefix route with metric on link up" + # verify peer metric added correctly + set -e + run_cmd "$IP -6 addr flush dev dummy2" + run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::1 peer 2001:db8:104::2 metric 260" + set +e + + check_route6 "2001:db8:104::1 dev dummy2 proto kernel metric 260" + log_test $? 0 "Set metric with peer route on local side" + log_test $? 0 "User specified metric on local address" + check_route6 "2001:db8:104::2 dev dummy2 proto kernel metric 260" + log_test $? 0 "Set metric with peer route on peer side" + + set -e + run_cmd "$IP -6 addr change dev dummy2 2001:db8:104::1 peer 2001:db8:104::3 metric 261" + set +e + + check_route6 "2001:db8:104::1 dev dummy2 proto kernel metric 261" + log_test $? 0 "Modify metric and peer address on local side" + check_route6 "2001:db8:104::3 dev dummy2 proto kernel metric 261" + log_test $? 0 "Modify metric and peer address on peer side" + $IP li del dummy1 $IP li del dummy2 cleanup @@ -1457,13 +1478,20 @@ ipv4_addr_metric_test() run_cmd "$IP addr flush dev dummy2" run_cmd "$IP addr add dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 260" - run_cmd "$IP addr change dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 261" rc=$? if [ $rc -eq 0 ]; then - check_route "172.16.104.2 dev dummy2 proto kernel scope link src 172.16.104.1 metric 261" + check_route "172.16.104.2 dev dummy2 proto kernel scope link src 172.16.104.1 metric 260" + rc=$? + fi + log_test $rc 0 "Set metric of address with peer route" + + run_cmd "$IP addr change dev dummy2 172.16.104.1/32 peer 172.16.104.3 metric 261" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.104.3 dev dummy2 proto kernel scope link src 172.16.104.1 metric 261" rc=$? fi - log_test $rc 0 "Modify metric of address with peer route" + log_test $rc 0 "Modify metric and peer address for peer route" $IP li del dummy1 $IP li del dummy2 diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index a4a7879b3bb9..977fc2b326a2 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -60,6 +60,15 @@ check_tc_chain_support() fi } +check_tc_action_hw_stats_support() +{ + tc actions help 2>&1 | grep -q hw_stats + if [[ $? -ne 0 ]]; then + echo "SKIP: iproute2 too old; tc is missing action hw_stats support" + exit 1 + fi +} + if [[ "$(id -u)" -ne 0 ]]; then echo "SKIP: need root privileges" exit 0 diff --git a/tools/testing/selftests/net/forwarding/skbedit_priority.sh b/tools/testing/selftests/net/forwarding/skbedit_priority.sh new file mode 100755 index 000000000000..0e7693297765 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/skbedit_priority.sh @@ -0,0 +1,163 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on +# egress of $swp2, the traffic is acted upon by an action skbedit priority. The +# new priority should be taken into account when classifying traffic on the PRIO +# qdisc at $swp2. The test verifies that for different priority values, the +# traffic ends up in expected PRIO band. +# +# +----------------------+ +----------------------+ +# | H1 | | H2 | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# +----|-----------------+ +----------------|-----+ +# | | +# +----|----------------------------------------------------------------|-----+ +# | SW | | | +# | +-|----------------------------------------------------------------|-+ | +# | | + $swp1 BR $swp2 + | | +# | | PRIO | | +# | +--------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + test_ingress + test_egress +" + +NUM_NETIFS=4 +source lib.sh + +: ${HIT_TIMEOUT:=2000} # ms + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/28 +} + +switch_create() +{ + ip link add name br1 up type bridge vlan_filtering 1 + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact + tc qdisc add dev $swp2 root handle 10: \ + prio bands 8 priomap 7 6 5 4 3 2 1 0 +} + +switch_destroy() +{ + tc qdisc del dev $swp2 root + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + ip link del dev br1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h2mac=$(mac_get $h2) + + vrf_prepare + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.2 +} + +test_skbedit_priority_one() +{ + local locus=$1; shift + local prio=$1; shift + local classid=$1; shift + + RET=0 + + tc filter add $locus handle 101 pref 1 \ + flower action skbedit priority $prio + + local pkt0=$(qdisc_parent_stats_get $swp2 $classid .packets) + $MZ $h1 -t udp "sp=54321,dp=12345" -c 10 -d 20msec -p 100 \ + -a own -b $h2mac -A 192.0.2.1 -B 192.0.2.2 -q + local pkt1 + pkt1=$(busywait "$HIT_TIMEOUT" until_counter_is ">= $((pkt0 + 10))" \ + qdisc_parent_stats_get $swp2 $classid .packets) + + check_err $? "Expected to get 10 packets on class $classid, but got +$((pkt1 - pkt0))." + log_test "$locus skbedit priority $prio -> classid $classid" + + tc filter del $locus pref 1 +} + +test_ingress() +{ + local prio + + for prio in {0..7}; do + test_skbedit_priority_one "dev $swp1 ingress" \ + $prio 10:$((8 - prio)) + done +} + +test_egress() +{ + local prio + + for prio in {0..7}; do + test_skbedit_priority_one "dev $swp2 egress" \ + $prio 10:$((8 - prio)) + done +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c new file mode 100644 index 000000000000..7b01b7c2ec10 --- /dev/null +++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Check if we can fully utilize 4-tuples for connect(). + * + * Rules to bind sockets to the same port when all ephemeral ports are + * exhausted. + * + * 1. if there are TCP_LISTEN sockets on the port, fail to bind. + * 2. if there are sockets without SO_REUSEADDR, fail to bind. + * 3. if SO_REUSEADDR is disabled, fail to bind. + * 4. if SO_REUSEADDR is enabled and SO_REUSEPORT is disabled, + * succeed to bind. + * 5. if SO_REUSEADDR and SO_REUSEPORT are enabled and + * there is no socket having the both options and the same EUID, + * succeed to bind. + * 6. fail to bind. + * + * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp> + */ +#include <arpa/inet.h> +#include <netinet/in.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> +#include "../kselftest_harness.h" + +struct reuse_opts { + int reuseaddr[2]; + int reuseport[2]; +}; + +struct reuse_opts unreusable_opts[12] = { + {0, 0, 0, 0}, + {0, 0, 0, 1}, + {0, 0, 1, 0}, + {0, 0, 1, 1}, + {0, 1, 0, 0}, + {0, 1, 0, 1}, + {0, 1, 1, 0}, + {0, 1, 1, 1}, + {1, 0, 0, 0}, + {1, 0, 0, 1}, + {1, 0, 1, 0}, + {1, 0, 1, 1}, +}; + +struct reuse_opts reusable_opts[4] = { + {1, 1, 0, 0}, + {1, 1, 0, 1}, + {1, 1, 1, 0}, + {1, 1, 1, 1}, +}; + +int bind_port(struct __test_metadata *_metadata, int reuseaddr, int reuseport) +{ + struct sockaddr_in local_addr; + int len = sizeof(local_addr); + int fd, ret; + + fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_NE(-1, fd) TH_LOG("failed to open socket."); + + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr, sizeof(int)); + ASSERT_EQ(0, ret) TH_LOG("failed to setsockopt: SO_REUSEADDR."); + + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &reuseport, sizeof(int)); + ASSERT_EQ(0, ret) TH_LOG("failed to setsockopt: SO_REUSEPORT."); + + local_addr.sin_family = AF_INET; + local_addr.sin_addr.s_addr = inet_addr("127.0.0.1"); + local_addr.sin_port = 0; + + if (bind(fd, (struct sockaddr *)&local_addr, len) == -1) { + close(fd); + return -1; + } + + return fd; +} + +TEST(reuseaddr_ports_exhausted_unreusable) +{ + struct reuse_opts *opts; + int i, j, fd[2]; + + for (i = 0; i < 12; i++) { + opts = &unreusable_opts[i]; + + for (j = 0; j < 2; j++) + fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]); + + ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind."); + EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind."); + + for (j = 0; j < 2; j++) + if (fd[j] != -1) + close(fd[j]); + } +} + +TEST(reuseaddr_ports_exhausted_reusable_same_euid) +{ + struct reuse_opts *opts; + int i, j, fd[2]; + + for (i = 0; i < 4; i++) { + opts = &reusable_opts[i]; + + for (j = 0; j < 2; j++) + fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]); + + ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind."); + + if (opts->reuseport[0] && opts->reuseport[1]) { + EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets succeed to be listened."); + } else { + EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind to connect to different destinations."); + } + + for (j = 0; j < 2; j++) + if (fd[j] != -1) + close(fd[j]); + } +} + +TEST(reuseaddr_ports_exhausted_reusable_different_euid) +{ + struct reuse_opts *opts; + int i, j, ret, fd[2]; + uid_t euid[2] = {10, 20}; + + for (i = 0; i < 4; i++) { + opts = &reusable_opts[i]; + + for (j = 0; j < 2; j++) { + ret = seteuid(euid[j]); + ASSERT_EQ(0, ret) TH_LOG("failed to seteuid: %d.", euid[j]); + + fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]); + + ret = seteuid(0); + ASSERT_EQ(0, ret) TH_LOG("failed to seteuid: 0."); + } + + ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind."); + EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind because one socket can be bound in each euid."); + + if (fd[1] != -1) { + ret = listen(fd[0], 5); + ASSERT_EQ(0, ret) TH_LOG("failed to listen."); + + ret = listen(fd[1], 5); + EXPECT_EQ(-1, ret) TH_LOG("should fail to listen because only one uid reserves the port in TCP_LISTEN."); + } + + for (j = 0; j < 2; j++) + if (fd[j] != -1) + close(fd[j]); + } +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.sh b/tools/testing/selftests/net/reuseaddr_ports_exhausted.sh new file mode 100755 index 000000000000..20e3a2913d06 --- /dev/null +++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Run tests when all ephemeral ports are exhausted. +# +# Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp> + +set +x +set -e + +readonly NETNS="ns-$(mktemp -u XXXXXX)" + +setup() { + ip netns add "${NETNS}" + ip -netns "${NETNS}" link set lo up + ip netns exec "${NETNS}" \ + sysctl -w net.ipv4.ip_local_port_range="32768 32768" \ + > /dev/null 2>&1 + ip netns exec "${NETNS}" \ + sysctl -w net.ipv4.ip_autobind_reuse=1 > /dev/null 2>&1 +} + +cleanup() { + ip netns del "${NETNS}" +} + +trap cleanup EXIT +setup + +do_test() { + ip netns exec "${NETNS}" ./reuseaddr_ports_exhausted +} + +do_test +echo "tests done" diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/networking/timestamping/txtimestamp.c index 7e386be47120..011b0da6b033 100644 --- a/tools/testing/selftests/networking/timestamping/txtimestamp.c +++ b/tools/testing/selftests/networking/timestamping/txtimestamp.c @@ -41,6 +41,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <sys/epoll.h> #include <sys/ioctl.h> #include <sys/select.h> #include <sys/socket.h> @@ -49,6 +50,10 @@ #include <time.h> #include <unistd.h> +#define NSEC_PER_USEC 1000L +#define USEC_PER_SEC 1000000L +#define NSEC_PER_SEC 1000000000LL + /* command line parameters */ static int cfg_proto = SOCK_STREAM; static int cfg_ipproto = IPPROTO_TCP; @@ -61,12 +66,16 @@ static int cfg_delay_snd; static int cfg_delay_ack; static bool cfg_show_payload; static bool cfg_do_pktinfo; +static bool cfg_busy_poll; +static int cfg_sleep_usec = 50 * 1000; static bool cfg_loop_nodata; -static bool cfg_no_delay; static bool cfg_use_cmsg; static bool cfg_use_pf_packet; +static bool cfg_use_epoll; +static bool cfg_epollet; static bool cfg_do_listen; static uint16_t dest_port = 9000; +static bool cfg_print_nsec; static struct sockaddr_in daddr; static struct sockaddr_in6 daddr6; @@ -75,11 +84,48 @@ static struct timespec ts_usr; static int saved_tskey = -1; static int saved_tskey_type = -1; +struct timing_event { + int64_t min; + int64_t max; + int64_t total; + int count; +}; + +static struct timing_event usr_enq; +static struct timing_event usr_snd; +static struct timing_event usr_ack; + static bool test_failed; +static int64_t timespec_to_ns64(struct timespec *ts) +{ + return ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec; +} + static int64_t timespec_to_us64(struct timespec *ts) { - return ts->tv_sec * 1000 * 1000 + ts->tv_nsec / 1000; + return ts->tv_sec * USEC_PER_SEC + ts->tv_nsec / NSEC_PER_USEC; +} + +static void init_timing_event(struct timing_event *te) +{ + te->min = INT64_MAX; + te->max = 0; + te->total = 0; + te->count = 0; +} + +static void add_timing_event(struct timing_event *te, + struct timespec *t_start, struct timespec *t_end) +{ + int64_t ts_delta = timespec_to_ns64(t_end) - timespec_to_ns64(t_start); + + te->count++; + if (ts_delta < te->min) + te->min = ts_delta; + if (ts_delta > te->max) + te->max = ts_delta; + te->total += ts_delta; } static void validate_key(int tskey, int tstype) @@ -113,25 +159,43 @@ static void validate_timestamp(struct timespec *cur, int min_delay) start64 = timespec_to_us64(&ts_usr); if (cur64 < start64 + min_delay || cur64 > start64 + max_delay) { - fprintf(stderr, "ERROR: delay %lu expected between %d and %d\n", + fprintf(stderr, "ERROR: %lu us expected between %d and %d\n", cur64 - start64, min_delay, max_delay); test_failed = true; } } +static void __print_ts_delta_formatted(int64_t ts_delta) +{ + if (cfg_print_nsec) + fprintf(stderr, "%lu ns", ts_delta); + else + fprintf(stderr, "%lu us", ts_delta / NSEC_PER_USEC); +} + static void __print_timestamp(const char *name, struct timespec *cur, uint32_t key, int payload_len) { + int64_t ts_delta; + if (!(cur->tv_sec | cur->tv_nsec)) return; - fprintf(stderr, " %s: %lu s %lu us (seq=%u, len=%u)", - name, cur->tv_sec, cur->tv_nsec / 1000, - key, payload_len); - - if (cur != &ts_usr) - fprintf(stderr, " (USR %+" PRId64 " us)", - timespec_to_us64(cur) - timespec_to_us64(&ts_usr)); + if (cfg_print_nsec) + fprintf(stderr, " %s: %lu s %lu ns (seq=%u, len=%u)", + name, cur->tv_sec, cur->tv_nsec, + key, payload_len); + else + fprintf(stderr, " %s: %lu s %lu us (seq=%u, len=%u)", + name, cur->tv_sec, cur->tv_nsec / NSEC_PER_USEC, + key, payload_len); + + if (cur != &ts_usr) { + ts_delta = timespec_to_ns64(cur) - timespec_to_ns64(&ts_usr); + fprintf(stderr, " (USR +"); + __print_ts_delta_formatted(ts_delta); + fprintf(stderr, ")"); + } fprintf(stderr, "\n"); } @@ -155,14 +219,17 @@ static void print_timestamp(struct scm_timestamping *tss, int tstype, case SCM_TSTAMP_SCHED: tsname = " ENQ"; validate_timestamp(&tss->ts[0], 0); + add_timing_event(&usr_enq, &ts_usr, &tss->ts[0]); break; case SCM_TSTAMP_SND: tsname = " SND"; validate_timestamp(&tss->ts[0], cfg_delay_snd); + add_timing_event(&usr_snd, &ts_usr, &tss->ts[0]); break; case SCM_TSTAMP_ACK: tsname = " ACK"; validate_timestamp(&tss->ts[0], cfg_delay_ack); + add_timing_event(&usr_ack, &ts_usr, &tss->ts[0]); break; default: error(1, 0, "unknown timestamp type: %u", @@ -171,6 +238,21 @@ static void print_timestamp(struct scm_timestamping *tss, int tstype, __print_timestamp(tsname, &tss->ts[0], tskey, payload_len); } +static void print_timing_event(char *name, struct timing_event *te) +{ + if (!te->count) + return; + + fprintf(stderr, " %s: count=%d", name, te->count); + fprintf(stderr, ", avg="); + __print_ts_delta_formatted((int64_t)(te->total / te->count)); + fprintf(stderr, ", min="); + __print_ts_delta_formatted(te->min); + fprintf(stderr, ", max="); + __print_ts_delta_formatted(te->max); + fprintf(stderr, "\n"); +} + /* TODO: convert to check_and_print payload once API is stable */ static void print_payload(char *data, int len) { @@ -198,6 +280,17 @@ static void print_pktinfo(int family, int ifindex, void *saddr, void *daddr) daddr ? inet_ntop(family, daddr, da, sizeof(da)) : "unknown"); } +static void __epoll(int epfd) +{ + struct epoll_event events; + int ret; + + memset(&events, 0, sizeof(events)); + ret = epoll_wait(epfd, &events, 1, cfg_poll_timeout); + if (ret != 1) + error(1, errno, "epoll_wait"); +} + static void __poll(int fd) { struct pollfd pollfd; @@ -391,7 +484,11 @@ static void do_test(int family, unsigned int report_opt) struct msghdr msg; struct iovec iov; char *buf; - int fd, i, val = 1, total_len; + int fd, i, val = 1, total_len, epfd = 0; + + init_timing_event(&usr_enq); + init_timing_event(&usr_snd); + init_timing_event(&usr_ack); total_len = cfg_payload_len; if (cfg_use_pf_packet || cfg_proto == SOCK_RAW) { @@ -418,6 +515,20 @@ static void do_test(int family, unsigned int report_opt) if (fd < 0) error(1, errno, "socket"); + if (cfg_use_epoll) { + struct epoll_event ev; + + memset(&ev, 0, sizeof(ev)); + ev.data.fd = fd; + if (cfg_epollet) + ev.events |= EPOLLET; + epfd = epoll_create(1); + if (epfd <= 0) + error(1, errno, "epoll_create"); + if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev)) + error(1, errno, "epoll_ctl"); + } + /* reset expected key on each new socket */ saved_tskey = -1; @@ -525,19 +636,28 @@ static void do_test(int family, unsigned int report_opt) error(1, errno, "send"); /* wait for all errors to be queued, else ACKs arrive OOO */ - if (!cfg_no_delay) - usleep(50 * 1000); + if (cfg_sleep_usec) + usleep(cfg_sleep_usec); - __poll(fd); + if (!cfg_busy_poll) { + if (cfg_use_epoll) + __epoll(epfd); + else + __poll(fd); + } while (!recv_errmsg(fd)) {} } + print_timing_event("USR-ENQ", &usr_enq); + print_timing_event("USR-SND", &usr_snd); + print_timing_event("USR-ACK", &usr_ack); + if (close(fd)) error(1, errno, "close"); free(buf); - usleep(100 * 1000); + usleep(100 * NSEC_PER_USEC); } static void __attribute__((noreturn)) usage(const char *filepath) @@ -547,18 +667,22 @@ static void __attribute__((noreturn)) usage(const char *filepath) " -4: only IPv4\n" " -6: only IPv6\n" " -h: show this message\n" + " -b: busy poll to read from error queue\n" " -c N: number of packets for each test\n" " -C: use cmsg to set tstamp recording options\n" - " -D: no delay between packets\n" - " -F: poll() waits forever for an event\n" + " -e: use level-triggered epoll() instead of poll()\n" + " -E: use event-triggered epoll() instead of poll()\n" + " -F: poll()/epoll() waits forever for an event\n" " -I: request PKTINFO\n" " -l N: send N bytes at a time\n" " -L listen on hostname and port\n" " -n: set no-payload option\n" + " -N: print timestamps and durations in nsec (instead of usec)\n" " -p N: connect to port N\n" " -P: use PF_PACKET\n" " -r: use raw\n" " -R: use raw (IP_HDRINCL)\n" + " -S N: usec to sleep before reading error queue\n" " -u: use udp\n" " -v: validate SND delay (usec)\n" " -V: validate ACK delay (usec)\n" @@ -572,7 +696,8 @@ static void parse_opt(int argc, char **argv) int proto_count = 0; int c; - while ((c = getopt(argc, argv, "46c:CDFhIl:Lnp:PrRuv:V:x")) != -1) { + while ((c = getopt(argc, argv, + "46bc:CeEFhIl:LnNp:PrRS:uv:V:x")) != -1) { switch (c) { case '4': do_ipv6 = 0; @@ -580,15 +705,21 @@ static void parse_opt(int argc, char **argv) case '6': do_ipv4 = 0; break; + case 'b': + cfg_busy_poll = true; + break; case 'c': cfg_num_pkts = strtoul(optarg, NULL, 10); break; case 'C': cfg_use_cmsg = true; break; - case 'D': - cfg_no_delay = true; + case 'e': + cfg_use_epoll = true; break; + case 'E': + cfg_use_epoll = true; + cfg_epollet = true; case 'F': cfg_poll_timeout = -1; break; @@ -604,6 +735,9 @@ static void parse_opt(int argc, char **argv) case 'n': cfg_loop_nodata = true; break; + case 'N': + cfg_print_nsec = true; + break; case 'p': dest_port = strtoul(optarg, NULL, 10); break; @@ -623,6 +757,9 @@ static void parse_opt(int argc, char **argv) cfg_proto = SOCK_RAW; cfg_ipproto = IPPROTO_RAW; break; + case 'S': + cfg_sleep_usec = strtoul(optarg, NULL, 10); + break; case 'u': proto_count++; cfg_proto = SOCK_DGRAM; @@ -653,6 +790,8 @@ static void parse_opt(int argc, char **argv) error(1, 0, "pass -P, -r, -R or -u, not multiple"); if (cfg_do_pktinfo && cfg_use_pf_packet) error(1, 0, "cannot ask for pktinfo over pf_packet"); + if (cfg_busy_poll && cfg_use_epoll) + error(1, 0, "pass epoll or busy_poll, not both"); if (optind != argc - 1) error(1, 0, "missing required hostname argument"); diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.sh b/tools/testing/selftests/networking/timestamping/txtimestamp.sh index df0d86ca72b7..70a8cda7fd53 100755 --- a/tools/testing/selftests/networking/timestamping/txtimestamp.sh +++ b/tools/testing/selftests/networking/timestamping/txtimestamp.sh @@ -43,15 +43,40 @@ run_test_tcpudpraw() { } run_test_all() { + setup run_test_tcpudpraw # setsockopt run_test_tcpudpraw -C # cmsg run_test_tcpudpraw -n # timestamp w/o data + echo "OK. All tests passed" +} + +run_test_one() { + setup + ./txtimestamp $@ +} + +usage() { + echo "Usage: $0 [ -r | --run ] <txtimestamp args> | [ -h | --help ]" + echo " (no args) Run all tests" + echo " -r|--run Run an individual test with arguments" + echo " -h|--help Help" +} + +main() { + if [[ $# -eq 0 ]]; then + run_test_all + else + if [[ "$1" = "-r" || "$1" == "--run" ]]; then + shift + run_test_one $@ + else + usage + fi + fi } if [[ "$(ip netns identify)" == "root" ]]; then ../../net/in_netns.sh $0 $@ else - setup - run_test_all - echo "OK. All tests passed" + main $@ fi diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore index 3a779c084d96..39559d723c41 100644 --- a/tools/testing/selftests/pidfd/.gitignore +++ b/tools/testing/selftests/pidfd/.gitignore @@ -2,4 +2,5 @@ pidfd_open_test pidfd_poll_test pidfd_test pidfd_wait +pidfd_fdinfo_test pidfd_getfd_test diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index c812faa29f36..c33a7aac27ff 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -58,6 +58,7 @@ CONFIG_NET_IFE_SKBMARK=m CONFIG_NET_IFE_SKBPRIO=m CONFIG_NET_IFE_SKBTCINDEX=m CONFIG_NET_SCH_FIFO=y +CONFIG_NET_SCH_ETS=m # ## Network testing diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json new file mode 100644 index 000000000000..0703a2a255eb --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json @@ -0,0 +1,185 @@ +[ + { + "id": "8b6e", + "name": "Create RED with no flags", + "category": [ + "qdisc", + "red" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red limit 1M avpkt 1500 min 100K max 300K", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb $", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "342e", + "name": "Create RED with adaptive flag", + "category": [ + "qdisc", + "red" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red adaptive limit 1M avpkt 1500 min 100K max 300K", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb adaptive $", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "2d4b", + "name": "Create RED with ECN flag", + "category": [ + "qdisc", + "red" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn limit 1M avpkt 1500 min 100K max 300K", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn $", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "650f", + "name": "Create RED with flags ECN, adaptive", + "category": [ + "qdisc", + "red" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn adaptive limit 1M avpkt 1500 min 100K max 300K", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn adaptive $", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "5f15", + "name": "Create RED with flags ECN, harddrop", + "category": [ + "qdisc", + "red" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn harddrop limit 1M avpkt 1500 min 100K max 300K", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn harddrop $", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "53e8", + "name": "Create RED with flags ECN, nodrop", + "category": [ + "qdisc", + "red" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn nodrop limit 1M avpkt 1500 min 100K max 300K", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn nodrop $", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "d091", + "name": "Fail to create RED with only nodrop flag", + "category": [ + "qdisc", + "red" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red nodrop limit 1M avpkt 1500 min 100K max 300K", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc red", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "af8e", + "name": "Create RED with flags ECN, nodrop, harddrop", + "category": [ + "qdisc", + "red" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn harddrop nodrop limit 1M avpkt 1500 min 100K max 300K", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn harddrop nodrop $", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + } +] diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index d65a0faa46d8..eda7b624eab8 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -742,9 +742,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) guest_enter_irqoff(); if (has_vhe()) { - kvm_arm_vhe_guest_enter(); ret = kvm_vcpu_run_vhe(vcpu); - kvm_arm_vhe_guest_exit(); } else { ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu); } diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h index 204d210d01c2..cc94ccc68821 100644 --- a/virt/kvm/arm/trace.h +++ b/virt/kvm/arm/trace.h @@ -4,6 +4,7 @@ #include <kvm/arm_arch_timer.h> #include <linux/tracepoint.h> +#include <asm/kvm_arm.h> #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm |