diff options
1267 files changed, 14671 insertions, 9308 deletions
@@ -71,6 +71,9 @@ Chao Yu <chao@kernel.org> <chao2.yu@samsung.com> Chao Yu <chao@kernel.org> <yuchao0@huawei.com> Chris Chiu <chris.chiu@canonical.com> <chiu@endlessm.com> Chris Chiu <chris.chiu@canonical.com> <chiu@endlessos.org> +Christian Borntraeger <borntraeger@linux.ibm.com> <borntraeger@de.ibm.com> +Christian Borntraeger <borntraeger@linux.ibm.com> <cborntra@de.ibm.com> +Christian Borntraeger <borntraeger@linux.ibm.com> <borntrae@de.ibm.com> Christophe Ricard <christophe.ricard@gmail.com> Christoph Hellwig <hch@lst.de> Colin Ian King <colin.king@intel.com> <colin.king@canonical.com> @@ -123,6 +126,8 @@ Greg Kroah-Hartman <gregkh@suse.de> Greg Kroah-Hartman <greg@kroah.com> Greg Kurz <groug@kaod.org> <gkurz@linux.vnet.ibm.com> Gregory CLEMENT <gregory.clement@bootlin.com> <gregory.clement@free-electrons.com> +Guo Ren <guoren@kernel.org> <guoren@linux.alibaba.com> +Guo Ren <guoren@kernel.org> <ren_guo@c-sky.com> Gustavo Padovan <gustavo@las.ic.unicamp.br> Gustavo Padovan <padovan@profusion.mobi> Hanjun Guo <guohanjun@huawei.com> <hanjun.guo@linaro.org> diff --git a/Documentation/admin-guide/blockdev/drbd/figures.rst b/Documentation/admin-guide/blockdev/drbd/figures.rst index bd9a4901fe46..9f73253ea353 100644 --- a/Documentation/admin-guide/blockdev/drbd/figures.rst +++ b/Documentation/admin-guide/blockdev/drbd/figures.rst @@ -25,6 +25,6 @@ Sub graphs of DRBD's state transitions :alt: disk-states-8.dot :align: center -.. kernel-figure:: node-states-8.dot - :alt: node-states-8.dot +.. kernel-figure:: peer-states-8.dot + :alt: peer-states-8.dot :align: center diff --git a/Documentation/admin-guide/blockdev/drbd/node-states-8.dot b/Documentation/admin-guide/blockdev/drbd/peer-states-8.dot index bfa54e1f8016..6dc3954954d6 100644 --- a/Documentation/admin-guide/blockdev/drbd/node-states-8.dot +++ b/Documentation/admin-guide/blockdev/drbd/peer-states-8.dot @@ -1,8 +1,3 @@ -digraph node_states { - Secondary -> Primary [ label = "ioctl_set_state()" ] - Primary -> Secondary [ label = "ioctl_set_state()" ] -} - digraph peer_states { Secondary -> Primary [ label = "recv state packet" ] Primary -> Secondary [ label = "recv state packet" ] diff --git a/Documentation/admin-guide/laptops/thinkpad-acpi.rst b/Documentation/admin-guide/laptops/thinkpad-acpi.rst index 6721a80a2d4f..475eb0e81e4a 100644 --- a/Documentation/admin-guide/laptops/thinkpad-acpi.rst +++ b/Documentation/admin-guide/laptops/thinkpad-acpi.rst @@ -1520,15 +1520,15 @@ This sysfs attribute controls the keyboard "face" that will be shown on the Lenovo X1 Carbon 2nd gen (2014)'s adaptive keyboard. The value can be read and set. -- 1 = Home mode -- 2 = Web-browser mode -- 3 = Web-conference mode -- 4 = Function mode -- 5 = Layflat mode +- 0 = Home mode +- 1 = Web-browser mode +- 2 = Web-conference mode +- 3 = Function mode +- 4 = Layflat mode For more details about which buttons will appear depending on the mode, please review the laptop's user guide: -http://www.lenovo.com/shop/americas/content/user_guides/x1carbon_2_ug_en.pdf +https://download.lenovo.com/ibmdl/pub/pc/pccbbs/mobiles_pdf/x1carbon_2_ug_en.pdf Battery charge control ---------------------- diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 426162009ce9..0e486f41185e 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -1099,7 +1099,7 @@ task_delayacct =============== Enables/disables task delay accounting (see -:doc:`accounting/delay-accounting.rst`). Enabling this feature incurs +Documentation/accounting/delay-accounting.rst. Enabling this feature incurs a small amount of overhead in the scheduler but is useful for debugging and performance tuning. It is required by some tools such as iotop. diff --git a/Documentation/arm/marvell.rst b/Documentation/arm/marvell.rst index 8323c79d321b..9485a5a2e2e9 100644 --- a/Documentation/arm/marvell.rst +++ b/Documentation/arm/marvell.rst @@ -104,6 +104,8 @@ Discovery family Not supported by the Linux kernel. + Homepage: + https://web.archive.org/web/20110924171043/http://www.marvell.com/embedded-processors/discovery-innovation/ Core: Feroceon 88fr571-vd ARMv5 compatible @@ -120,6 +122,7 @@ EBU Armada family - 88F6707 - 88F6W11 + - Product infos: https://web.archive.org/web/20141002083258/http://www.marvell.com/embedded-processors/armada-370/ - Product Brief: https://web.archive.org/web/20121115063038/http://www.marvell.com/embedded-processors/armada-300/assets/Marvell_ARMADA_370_SoC.pdf - Hardware Spec: https://web.archive.org/web/20140617183747/http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA370-datasheet.pdf - Functional Spec: https://web.archive.org/web/20140617183701/http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA370-FunctionalSpec-datasheet.pdf @@ -127,9 +130,29 @@ EBU Armada family Core: Sheeva ARMv7 compatible PJ4B + Armada XP Flavors: + - MV78230 + - MV78260 + - MV78460 + + NOTE: + not to be confused with the non-SMP 78xx0 SoCs + + - Product infos: https://web.archive.org/web/20150101215721/http://www.marvell.com/embedded-processors/armada-xp/ + - Product Brief: https://web.archive.org/web/20121021173528/http://www.marvell.com/embedded-processors/armada-xp/assets/Marvell-ArmadaXP-SoC-product%20brief.pdf + - Functional Spec: https://web.archive.org/web/20180829171131/http://www.marvell.com/embedded-processors/armada-xp/assets/ARMADA-XP-Functional-SpecDatasheet.pdf + - Hardware Specs: + - https://web.archive.org/web/20141127013651/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78230_OS.PDF + - https://web.archive.org/web/20141222000224/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78260_OS.PDF + - https://web.archive.org/web/20141222000230/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78460_OS.PDF + + Core: + Sheeva ARMv7 compatible Dual-core or Quad-core PJ4B-MP + Armada 375 Flavors: - 88F6720 + - Product infos: https://web.archive.org/web/20140108032402/http://www.marvell.com/embedded-processors/armada-375/ - Product Brief: https://web.archive.org/web/20131216023516/http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA_375_SoC-01_product_brief.pdf Core: @@ -162,29 +185,6 @@ EBU Armada family Core: ARM Cortex-A9 - Armada XP Flavors: - - MV78230 - - MV78260 - - MV78460 - - NOTE: - not to be confused with the non-SMP 78xx0 SoCs - - Product Brief: - https://web.archive.org/web/20121021173528/http://www.marvell.com/embedded-processors/armada-xp/assets/Marvell-ArmadaXP-SoC-product%20brief.pdf - - Functional Spec: - https://web.archive.org/web/20180829171131/http://www.marvell.com/embedded-processors/armada-xp/assets/ARMADA-XP-Functional-SpecDatasheet.pdf - - - Hardware Specs: - - - https://web.archive.org/web/20141127013651/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78230_OS.PDF - - https://web.archive.org/web/20141222000224/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78260_OS.PDF - - https://web.archive.org/web/20141222000230/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78460_OS.PDF - - Core: - Sheeva ARMv7 compatible Dual-core or Quad-core PJ4B-MP - Linux kernel mach directory: arch/arm/mach-mvebu Linux kernel plat directory: @@ -436,7 +436,7 @@ Berlin family (Multimedia Solutions) - Flavors: - 88DE3010, Armada 1000 (no Linux support) - Core: Marvell PJ1 (ARMv5TE), Dual-core - - Product Brief: http://www.marvell.com.cn/digital-entertainment/assets/armada_1000_pb.pdf + - Product Brief: https://web.archive.org/web/20131103162620/http://www.marvell.com/digital-entertainment/assets/armada_1000_pb.pdf - 88DE3005, Armada 1500 Mini - Design name: BG2CD - Core: ARM Cortex-A9, PL310 L2CC diff --git a/Documentation/arm64/pointer-authentication.rst b/Documentation/arm64/pointer-authentication.rst index f127666ea3a8..e5dad2e40aa8 100644 --- a/Documentation/arm64/pointer-authentication.rst +++ b/Documentation/arm64/pointer-authentication.rst @@ -53,11 +53,10 @@ The number of bits that the PAC occupies in a pointer is 55 minus the virtual address size configured by the kernel. For example, with a virtual address size of 48, the PAC is 7 bits wide. -Recent versions of GCC can compile code with APIAKey-based return -address protection when passed the -msign-return-address option. This -uses instructions in the HINT space (unless -march=armv8.3-a or higher -is also passed), and such code can run on systems without the pointer -authentication extension. +When ARM64_PTR_AUTH_KERNEL is selected, the kernel will be compiled +with HINT space pointer authentication instructions protecting +function returns. Kernels built with this option will work on hardware +with or without pointer authentication support. In addition to exec(), keys can also be reinitialized to random values using the PR_PAC_RESET_KEYS prctl. A bitmask of PR_PAC_APIAKEY, diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst index 37f273a7e8b6..610450f59e05 100644 --- a/Documentation/bpf/index.rst +++ b/Documentation/bpf/index.rst @@ -15,7 +15,7 @@ that goes into great technical depth about the BPF Architecture. libbpf ====== -Documentation/bpf/libbpf/libbpf.rst is a userspace library for loading and interacting with bpf programs. +Documentation/bpf/libbpf/index.rst is a userspace library for loading and interacting with bpf programs. BPF Type Format (BTF) ===================== diff --git a/Documentation/conf.py b/Documentation/conf.py index 17f7cee56987..76e5eb5cb62b 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -249,11 +249,16 @@ except ImportError: html_static_path = ['sphinx-static'] -html_context = { - 'css_files': [ - '_static/theme_overrides.css', - ], -} +html_css_files = [ + 'theme_overrides.css', +] + +if major <= 1 and minor < 8: + html_context = { + 'css_files': [ + '_static/theme_overrides.css', + ], + } # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied diff --git a/Documentation/cpu-freq/core.rst b/Documentation/cpu-freq/core.rst index 33cb90bd1d8f..4ceef8e7217c 100644 --- a/Documentation/cpu-freq/core.rst +++ b/Documentation/cpu-freq/core.rst @@ -73,12 +73,12 @@ CPUFREQ_POSTCHANGE. The third argument is a struct cpufreq_freqs with the following values: -===== =========================== -cpu number of the affected CPU +====== ====================================== +policy a pointer to the struct cpufreq_policy old old frequency new new frequency flags flags of the cpufreq driver -===== =========================== +====== ====================================== 3. CPUFreq Table Generation with Operating Performance Point (OPP) ================================================================== diff --git a/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml b/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml index 29b9447f3b84..fe0c89edf7c1 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml +++ b/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml @@ -17,9 +17,10 @@ properties: oneOf: - enum: - fsl,imx7ulp-lpi2c - - fsl,imx8qm-lpi2c - items: - - const: fsl,imx8qxp-lpi2c + - enum: + - fsl,imx8qxp-lpi2c + - fsl,imx8qm-lpi2c - const: fsl,imx7ulp-lpi2c reg: diff --git a/Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.yaml b/Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.yaml index c65921e66dc1..81c87295912c 100644 --- a/Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.yaml @@ -136,7 +136,7 @@ examples: samsung,syscon-phandle = <&pmu_system_controller>; /* NTC thermistor is a hwmon device */ - ncp15wb473 { + thermistor { compatible = "murata,ncp15wb473"; pullup-uv = <1800000>; pullup-ohm = <47000>; diff --git a/Documentation/devicetree/bindings/input/gpio-keys.yaml b/Documentation/devicetree/bindings/input/gpio-keys.yaml index 060a309ff8e7..dbe7ecc19ccb 100644 --- a/Documentation/devicetree/bindings/input/gpio-keys.yaml +++ b/Documentation/devicetree/bindings/input/gpio-keys.yaml @@ -142,7 +142,7 @@ examples: down { label = "GPIO Key DOWN"; linux,code = <108>; - interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; + interrupts = <1 IRQ_TYPE_EDGE_FALLING>; }; }; diff --git a/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml b/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml index 877183cf4278..1ef849dc74d7 100644 --- a/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml +++ b/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml @@ -79,6 +79,8 @@ properties: properties: data-lanes: + description: + Note that 'fsl,imx7-mipi-csi2' only supports up to 2 data lines. items: minItems: 1 maxItems: 4 @@ -91,18 +93,6 @@ properties: required: - data-lanes - allOf: - - if: - properties: - compatible: - contains: - const: fsl,imx7-mipi-csi2 - then: - properties: - data-lanes: - items: - maxItems: 2 - port@1: $ref: /schemas/graph.yaml#/properties/port description: diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml index 2766fe45bb98..ee42328a109d 100644 --- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml @@ -91,6 +91,14 @@ properties: compensate for the board being designed with the lanes swapped. + enet-phy-lane-no-swap: + $ref: /schemas/types.yaml#/definitions/flag + description: + If set, indicates that PHY will disable swap of the + TX/RX lanes. This property allows the PHY to work correcly after + e.g. wrong bootstrap configuration caused by issues in PCB + layout design. + eee-broken-100tx: $ref: /schemas/types.yaml#/definitions/flag description: diff --git a/Documentation/devicetree/bindings/phy/xlnx,zynqmp-psgtr.yaml b/Documentation/devicetree/bindings/phy/xlnx,zynqmp-psgtr.yaml index 04d5654efb38..79906519c652 100644 --- a/Documentation/devicetree/bindings/phy/xlnx,zynqmp-psgtr.yaml +++ b/Documentation/devicetree/bindings/phy/xlnx,zynqmp-psgtr.yaml @@ -29,7 +29,7 @@ properties: - PHY_TYPE_PCIE - PHY_TYPE_SATA - PHY_TYPE_SGMII - - PHY_TYPE_USB + - PHY_TYPE_USB3 - description: The PHY instance minimum: 0 maximum: 1 # for DP, SATA or USB diff --git a/Documentation/devicetree/bindings/power/supply/bq25980.yaml b/Documentation/devicetree/bindings/power/supply/bq25980.yaml index 06eca6667f67..8367a1fd4057 100644 --- a/Documentation/devicetree/bindings/power/supply/bq25980.yaml +++ b/Documentation/devicetree/bindings/power/supply/bq25980.yaml @@ -105,7 +105,7 @@ examples: reg = <0x65>; interrupt-parent = <&gpio1>; interrupts = <16 IRQ_TYPE_EDGE_FALLING>; - ti,watchdog-timer = <0>; + ti,watchdog-timeout-ms = <0>; ti,sc-ocp-limit-microamp = <2000000>; ti,sc-ovp-limit-microvolt = <17800000>; monitored-battery = <&bat>; diff --git a/Documentation/devicetree/bindings/sound/wlf,wm8962.yaml b/Documentation/devicetree/bindings/sound/wlf,wm8962.yaml index 0e6249d7c133..5e172e9462b9 100644 --- a/Documentation/devicetree/bindings/sound/wlf,wm8962.yaml +++ b/Documentation/devicetree/bindings/sound/wlf,wm8962.yaml @@ -19,6 +19,9 @@ properties: clocks: maxItems: 1 + interrupts: + maxItems: 1 + "#sound-dai-cells": const: 0 diff --git a/Documentation/devicetree/bindings/spi/spi-rockchip.yaml b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml index 7f987e79337c..52a78a2e362e 100644 --- a/Documentation/devicetree/bindings/spi/spi-rockchip.yaml +++ b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml @@ -33,6 +33,7 @@ properties: - rockchip,rk3328-spi - rockchip,rk3368-spi - rockchip,rk3399-spi + - rockchip,rk3568-spi - rockchip,rv1126-spi - const: rockchip,rk3066-spi diff --git a/Documentation/doc-guide/sphinx.rst b/Documentation/doc-guide/sphinx.rst index ec3e71f56009..e445cb146efe 100644 --- a/Documentation/doc-guide/sphinx.rst +++ b/Documentation/doc-guide/sphinx.rst @@ -27,7 +27,7 @@ Sphinx Install ============== The ReST markups currently used by the Documentation/ files are meant to be -built with ``Sphinx`` version 1.3 or higher. +built with ``Sphinx`` version 1.7 or higher. There's a script that checks for the Sphinx requirements. Please see :ref:`sphinx-pre-install` for further details. @@ -43,10 +43,6 @@ or ``virtualenv``, depending on how your distribution packaged Python 3. .. note:: - #) Sphinx versions below 1.5 don't work properly with Python's - docutils version 0.13.1 or higher. So, if you're willing to use - those versions, you should run ``pip install 'docutils==0.12'``. - #) It is recommended to use the RTD theme for html output. Depending on the Sphinx version, it should be installed separately, with ``pip install sphinx_rtd_theme``. @@ -55,13 +51,13 @@ or ``virtualenv``, depending on how your distribution packaged Python 3. those expressions are written using LaTeX notation. It needs texlive installed with amsfonts and amsmath in order to evaluate them. -In summary, if you want to install Sphinx version 1.7.9, you should do:: +In summary, if you want to install Sphinx version 2.4.4, you should do:: - $ virtualenv sphinx_1.7.9 - $ . sphinx_1.7.9/bin/activate - (sphinx_1.7.9) $ pip install -r Documentation/sphinx/requirements.txt + $ virtualenv sphinx_2.4.4 + $ . sphinx_2.4.4/bin/activate + (sphinx_2.4.4) $ pip install -r Documentation/sphinx/requirements.txt -After running ``. sphinx_1.7.9/bin/activate``, the prompt will change, +After running ``. sphinx_2.4.4/bin/activate``, the prompt will change, in order to indicate that you're using the new environment. If you open a new shell, you need to rerun this command to enter again at the virtual environment before building the documentation. @@ -81,7 +77,7 @@ output. PDF and LaTeX builds -------------------- -Such builds are currently supported only with Sphinx versions 1.4 and higher. +Such builds are currently supported only with Sphinx versions 2.4 and higher. For PDF and LaTeX output, you'll also need ``XeLaTeX`` version 3.14159265. @@ -104,8 +100,8 @@ command line options for your distro:: You should run: sudo dnf install -y texlive-luatex85 - /usr/bin/virtualenv sphinx_1.7.9 - . sphinx_1.7.9/bin/activate + /usr/bin/virtualenv sphinx_2.4.4 + . sphinx_2.4.4/bin/activate pip install -r Documentation/sphinx/requirements.txt Can't build as 1 mandatory dependency is missing at ./scripts/sphinx-pre-install line 468. diff --git a/Documentation/filesystems/autofs.rst b/Documentation/filesystems/autofs.rst index 681c6a492bc0..4f490278d22f 100644 --- a/Documentation/filesystems/autofs.rst +++ b/Documentation/filesystems/autofs.rst @@ -35,7 +35,7 @@ This document describes only the kernel module and the interactions required with any user-space program. Subsequent text refers to this as the "automount daemon" or simply "the daemon". -"autofs" is a Linux kernel module with provides the "autofs" +"autofs" is a Linux kernel module which provides the "autofs" filesystem type. Several "autofs" filesystems can be mounted and they can each be managed separately, or all managed by the same daemon. diff --git a/Documentation/filesystems/cifs/ksmbd.rst b/Documentation/filesystems/cifs/ksmbd.rst index a1326157d53f..b0d354fd8066 100644 --- a/Documentation/filesystems/cifs/ksmbd.rst +++ b/Documentation/filesystems/cifs/ksmbd.rst @@ -50,11 +50,11 @@ ksmbd.mountd (user space daemon) -------------------------------- ksmbd.mountd is userspace process to, transfer user account and password that -are registered using ksmbd.adduser(part of utils for user space). Further it +are registered using ksmbd.adduser (part of utils for user space). Further it allows sharing information parameters that parsed from smb.conf to ksmbd in kernel. For the execution part it has a daemon which is continuously running and connected to the kernel interface using netlink socket, it waits for the -requests(dcerpc and share/user info). It handles RPC calls (at a minimum few +requests (dcerpc and share/user info). It handles RPC calls (at a minimum few dozen) that are most important for file server from NetShareEnum and NetServerGetInfo. Complete DCE/RPC response is prepared from the user space and passed over to the associated kernel thread for the client. @@ -154,11 +154,11 @@ Each layer 1. Enable all component prints # sudo ksmbd.control -d "all" -2. Enable one of components(smb, auth, vfs, oplock, ipc, conn, rdma) +2. Enable one of components (smb, auth, vfs, oplock, ipc, conn, rdma) # sudo ksmbd.control -d "smb" -3. Show what prints are enable. - # cat/sys/class/ksmbd-control/debug +3. Show what prints are enabled. + # cat /sys/class/ksmbd-control/debug [smb] auth vfs oplock ipc conn [rdma] 4. Disable prints: diff --git a/Documentation/filesystems/netfs_library.rst b/Documentation/filesystems/netfs_library.rst index bb68d39f03b7..375baca7edcd 100644 --- a/Documentation/filesystems/netfs_library.rst +++ b/Documentation/filesystems/netfs_library.rst @@ -1,7 +1,7 @@ .. SPDX-License-Identifier: GPL-2.0 ================================= -NETWORK FILESYSTEM HELPER LIBRARY +Network Filesystem Helper Library ================================= .. Contents: @@ -37,22 +37,22 @@ into a common call framework. The following services are provided: - * Handles transparent huge pages (THPs). + * Handle folios that span multiple pages. - * Insulates the netfs from VM interface changes. + * Insulate the netfs from VM interface changes. - * Allows the netfs to arbitrarily split reads up into pieces, even ones that - don't match page sizes or page alignments and that may cross pages. + * Allow the netfs to arbitrarily split reads up into pieces, even ones that + don't match folio sizes or folio alignments and that may cross folios. - * Allows the netfs to expand a readahead request in both directions to meet - its needs. + * Allow the netfs to expand a readahead request in both directions to meet its + needs. - * Allows the netfs to partially fulfil a read, which will then be resubmitted. + * Allow the netfs to partially fulfil a read, which will then be resubmitted. - * Handles local caching, allowing cached data and server-read data to be + * Handle local caching, allowing cached data and server-read data to be interleaved for a single request. - * Handles clearing of bufferage that aren't on the server. + * Handle clearing of bufferage that aren't on the server. * Handle retrying of reads that failed, switching reads from the cache to the server as necessary. @@ -70,22 +70,22 @@ Read Helper Functions Three read helpers are provided:: - * void netfs_readahead(struct readahead_control *ractl, - const struct netfs_read_request_ops *ops, - void *netfs_priv);`` - * int netfs_readpage(struct file *file, - struct page *page, - const struct netfs_read_request_ops *ops, - void *netfs_priv); - * int netfs_write_begin(struct file *file, - struct address_space *mapping, - loff_t pos, - unsigned int len, - unsigned int flags, - struct page **_page, - void **_fsdata, - const struct netfs_read_request_ops *ops, - void *netfs_priv); + void netfs_readahead(struct readahead_control *ractl, + const struct netfs_read_request_ops *ops, + void *netfs_priv); + int netfs_readpage(struct file *file, + struct folio *folio, + const struct netfs_read_request_ops *ops, + void *netfs_priv); + int netfs_write_begin(struct file *file, + struct address_space *mapping, + loff_t pos, + unsigned int len, + unsigned int flags, + struct folio **_folio, + void **_fsdata, + const struct netfs_read_request_ops *ops, + void *netfs_priv); Each corresponds to a VM operation, with the addition of a couple of parameters for the use of the read helpers: @@ -103,8 +103,8 @@ Both of these values will be stored into the read request structure. For ->readahead() and ->readpage(), the network filesystem should just jump into the corresponding read helper; whereas for ->write_begin(), it may be a little more complicated as the network filesystem might want to flush -conflicting writes or track dirty data and needs to put the acquired page if an -error occurs after calling the helper. +conflicting writes or track dirty data and needs to put the acquired folio if +an error occurs after calling the helper. The helpers manage the read request, calling back into the network filesystem through the suppplied table of operations. Waits will be performed as @@ -253,7 +253,7 @@ through which it can issue requests and negotiate:: void (*issue_op)(struct netfs_read_subrequest *subreq); bool (*is_still_valid)(struct netfs_read_request *rreq); int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, - struct page *page, void **_fsdata); + struct folio *folio, void **_fsdata); void (*done)(struct netfs_read_request *rreq); void (*cleanup)(struct address_space *mapping, void *netfs_priv); }; @@ -313,13 +313,14 @@ The operations are as follows: There is no return value; the netfs_subreq_terminated() function should be called to indicate whether or not the operation succeeded and how much data - it transferred. The filesystem also should not deal with setting pages + it transferred. The filesystem also should not deal with setting folios uptodate, unlocking them or dropping their refs - the helpers need to deal with this as they have to coordinate with copying to the local cache. - Note that the helpers have the pages locked, but not pinned. It is possible - to use the ITER_XARRAY iov iterator to refer to the range of the inode that - is being operated upon without the need to allocate large bvec tables. + Note that the helpers have the folios locked, but not pinned. It is + possible to use the ITER_XARRAY iov iterator to refer to the range of the + inode that is being operated upon without the need to allocate large bvec + tables. * ``is_still_valid()`` @@ -330,15 +331,15 @@ The operations are as follows: * ``check_write_begin()`` [Optional] This is called from the netfs_write_begin() helper once it has - allocated/grabbed the page to be modified to allow the filesystem to flush + allocated/grabbed the folio to be modified to allow the filesystem to flush conflicting state before allowing it to be modified. - It should return 0 if everything is now fine, -EAGAIN if the page should be + It should return 0 if everything is now fine, -EAGAIN if the folio should be regrabbed and any other error code to abort the operation. * ``done`` - [Optional] This is called after the pages in the request have all been + [Optional] This is called after the folios in the request have all been unlocked (and marked uptodate if applicable). * ``cleanup`` @@ -390,7 +391,7 @@ The read helpers work by the following general procedure: * If NETFS_SREQ_CLEAR_TAIL was set, a short read will be cleared to the end of the slice instead of reissuing. - * Once the data is read, the pages that have been fully read/cleared: + * Once the data is read, the folios that have been fully read/cleared: * Will be marked uptodate. @@ -398,11 +399,11 @@ The read helpers work by the following general procedure: * Unlocked - * Any pages that need writing to the cache will then have DIO writes issued. + * Any folios that need writing to the cache will then have DIO writes issued. * Synchronous operations will wait for reading to be complete. - * Writes to the cache will proceed asynchronously and the pages will have the + * Writes to the cache will proceed asynchronously and the folios will have the PG_fscache mark removed when that completes. * The request structures will be cleaned up when everything has completed. @@ -452,6 +453,9 @@ operation table looks like the following:: netfs_io_terminated_t term_func, void *term_func_priv); + int (*prepare_write)(struct netfs_cache_resources *cres, + loff_t *_start, size_t *_len, loff_t i_size); + int (*write)(struct netfs_cache_resources *cres, loff_t start_pos, struct iov_iter *iter, @@ -509,6 +513,14 @@ The methods defined in the table are: indicating whether the termination is definitely happening in the caller's context. + * ``prepare_write()`` + + [Required] Called to adjust a write to the cache and check that there is + sufficient space in the cache. The start and length values indicate the + size of the write that netfslib is proposing, and this can be adjusted by + the cache to respect DIO boundaries. The file size is passed for + information. + * ``write()`` [Required] Called to write to the cache. The start file offset is given @@ -525,4 +537,9 @@ not the read request structure as they could be used in other situations where there isn't a read request structure as well, such as writing dirty data to the cache. + +API Function Reference +====================== + .. kernel-doc:: include/linux/netfs.h +.. kernel-doc:: fs/netfs/read_helper.c diff --git a/Documentation/i2c/smbus-protocol.rst b/Documentation/i2c/smbus-protocol.rst index 9e07e6bbe6a3..00d8e17d0aca 100644 --- a/Documentation/i2c/smbus-protocol.rst +++ b/Documentation/i2c/smbus-protocol.rst @@ -36,6 +36,8 @@ Key to symbols =============== ============================================================= S Start condition +Sr Repeated start condition, used to switch from write to + read mode. P Stop condition Rd/Wr (1 bit) Read/Write bit. Rd equals 1, Wr equals 0. A, NA (1 bit) Acknowledge (ACK) and Not Acknowledge (NACK) bit @@ -100,7 +102,7 @@ Implemented by i2c_smbus_read_byte_data() This reads a single byte from a device, from a designated register. The register is specified through the Comm byte:: - S Addr Wr [A] Comm [A] S Addr Rd [A] [Data] NA P + S Addr Wr [A] Comm [A] Sr Addr Rd [A] [Data] NA P Functionality flag: I2C_FUNC_SMBUS_READ_BYTE_DATA @@ -114,7 +116,7 @@ This operation is very like Read Byte; again, data is read from a device, from a designated register that is specified through the Comm byte. But this time, the data is a complete word (16 bits):: - S Addr Wr [A] Comm [A] S Addr Rd [A] [DataLow] A [DataHigh] NA P + S Addr Wr [A] Comm [A] Sr Addr Rd [A] [DataLow] A [DataHigh] NA P Functionality flag: I2C_FUNC_SMBUS_READ_WORD_DATA @@ -164,7 +166,7 @@ This command selects a device register (through the Comm byte), sends 16 bits of data to it, and reads 16 bits of data in return:: S Addr Wr [A] Comm [A] DataLow [A] DataHigh [A] - S Addr Rd [A] [DataLow] A [DataHigh] NA P + Sr Addr Rd [A] [DataLow] A [DataHigh] NA P Functionality flag: I2C_FUNC_SMBUS_PROC_CALL @@ -181,7 +183,7 @@ of data is specified by the device in the Count byte. :: S Addr Wr [A] Comm [A] - S Addr Rd [A] [Count] A [Data] A [Data] A ... A [Data] NA P + Sr Addr Rd [A] [Count] A [Data] A [Data] A ... A [Data] NA P Functionality flag: I2C_FUNC_SMBUS_READ_BLOCK_DATA @@ -212,7 +214,7 @@ This command selects a device register (through the Comm byte), sends 1 to 31 bytes of data to it, and reads 1 to 31 bytes of data in return:: S Addr Wr [A] Comm [A] Count [A] Data [A] ... - S Addr Rd [A] [Count] A [Data] ... A P + Sr Addr Rd [A] [Count] A [Data] ... A P Functionality flag: I2C_FUNC_SMBUS_BLOCK_PROC_CALL @@ -300,7 +302,7 @@ This command reads a block of bytes from a device, from a designated register that is specified through the Comm byte:: S Addr Wr [A] Comm [A] - S Addr Rd [A] [Data] A [Data] A ... A [Data] NA P + Sr Addr Rd [A] [Data] A [Data] A ... A [Data] NA P Functionality flag: I2C_FUNC_SMBUS_READ_I2C_BLOCK diff --git a/Documentation/locking/locktypes.rst b/Documentation/locking/locktypes.rst index ddada4a53749..4fd7b70fcde1 100644 --- a/Documentation/locking/locktypes.rst +++ b/Documentation/locking/locktypes.rst @@ -439,11 +439,9 @@ preemption. The following substitution works on both kernels:: spin_lock(&p->lock); p->count += this_cpu_read(var2); -On a non-PREEMPT_RT kernel migrate_disable() maps to preempt_disable() -which makes the above code fully equivalent. On a PREEMPT_RT kernel migrate_disable() ensures that the task is pinned on the current CPU which in turn guarantees that the per-CPU access to var1 and var2 are staying on -the same CPU. +the same CPU while the task remains preemptible. The migrate_disable() substitution is not valid for the following scenario:: @@ -456,9 +454,8 @@ scenario:: p = this_cpu_ptr(&var1); p->val = func2(); -While correct on a non-PREEMPT_RT kernel, this breaks on PREEMPT_RT because -here migrate_disable() does not protect against reentrancy from a -preempting task. A correct substitution for this case is:: +This breaks because migrate_disable() does not protect against reentrancy from +a preempting task. A correct substitution for this case is:: func() { diff --git a/Documentation/networking/ipvs-sysctl.rst b/Documentation/networking/ipvs-sysctl.rst index 95ef56d62077..387fda80f05f 100644 --- a/Documentation/networking/ipvs-sysctl.rst +++ b/Documentation/networking/ipvs-sysctl.rst @@ -37,8 +37,7 @@ conn_reuse_mode - INTEGER 0: disable any special handling on port reuse. The new connection will be delivered to the same real server that was - servicing the previous connection. This will effectively - disable expire_nodest_conn. + servicing the previous connection. bit 1: enable rescheduling of new connections when it is safe. That is, whenever expire_nodest_conn and for TCP sockets, when diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst index a722eb30e014..80b13353254a 100644 --- a/Documentation/networking/timestamping.rst +++ b/Documentation/networking/timestamping.rst @@ -486,8 +486,8 @@ of packets. Drivers are free to use a more permissive configuration than the requested configuration. It is expected that drivers should only implement directly the most generic mode that can be supported. For example if the hardware can -support HWTSTAMP_FILTER_V2_EVENT, then it should generally always upscale -HWTSTAMP_FILTER_V2_L2_SYNC_MESSAGE, and so forth, as HWTSTAMP_FILTER_V2_EVENT +support HWTSTAMP_FILTER_PTP_V2_EVENT, then it should generally always upscale +HWTSTAMP_FILTER_PTP_V2_L2_SYNC, and so forth, as HWTSTAMP_FILTER_PTP_V2_EVENT is more generic (and more useful to applications). A driver which supports hardware time stamping shall update the struct diff --git a/Documentation/power/energy-model.rst b/Documentation/power/energy-model.rst index 8a2788afe89b..5ac62a7b4b7c 100644 --- a/Documentation/power/energy-model.rst +++ b/Documentation/power/energy-model.rst @@ -84,6 +84,16 @@ CONFIG_ENERGY_MODEL must be enabled to use the EM framework. 2.2 Registration of performance domains ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Registration of 'advanced' EM +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The 'advanced' EM gets it's name due to the fact that the driver is allowed +to provide more precised power model. It's not limited to some implemented math +formula in the framework (like it's in 'simple' EM case). It can better reflect +the real power measurements performed for each performance state. Thus, this +registration method should be preferred in case considering EM static power +(leakage) is important. + Drivers are expected to register performance domains into the EM framework by calling the following API:: @@ -103,6 +113,18 @@ to: return warning/error, stop working or panic. See Section 3. for an example of driver implementing this callback, or Section 2.4 for further documentation on this API +Registration of 'simple' EM +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The 'simple' EM is registered using the framework helper function +cpufreq_register_em_with_opp(). It implements a power model which is tight to +math formula:: + + Power = C * V^2 * f + +The EM which is registered using this method might not reflect correctly the +physics of a real device, e.g. when static power (leakage) is important. + 2.3 Accessing performance domains ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -138,6 +160,10 @@ or in Section 2.4 3. Example driver ----------------- +The CPUFreq framework supports dedicated callback for registering +the EM for a given CPU(s) 'policy' object: cpufreq_driver::register_em(). +That callback has to be implemented properly for a given driver, +because the framework would call it at the right time during setup. This section provides a simple example of a CPUFreq driver registering a performance domain in the Energy Model framework using the (fake) 'foo' protocol. The driver implements an est_power() function to be provided to the @@ -167,25 +193,22 @@ EM framework:: 20 return 0; 21 } 22 - 23 static int foo_cpufreq_init(struct cpufreq_policy *policy) + 23 static void foo_cpufreq_register_em(struct cpufreq_policy *policy) 24 { 25 struct em_data_callback em_cb = EM_DATA_CB(est_power); 26 struct device *cpu_dev; - 27 int nr_opp, ret; + 27 int nr_opp; 28 29 cpu_dev = get_cpu_device(cpumask_first(policy->cpus)); 30 - 31 /* Do the actual CPUFreq init work ... */ - 32 ret = do_foo_cpufreq_init(policy); - 33 if (ret) - 34 return ret; - 35 - 36 /* Find the number of OPPs for this policy */ - 37 nr_opp = foo_get_nr_opp(policy); + 31 /* Find the number of OPPs for this policy */ + 32 nr_opp = foo_get_nr_opp(policy); + 33 + 34 /* And register the new performance domain */ + 35 em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, policy->cpus, + 36 true); + 37 } 38 - 39 /* And register the new performance domain */ - 40 em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, policy->cpus, - 41 true); - 42 - 43 return 0; - 44 } + 39 static struct cpufreq_driver foo_cpufreq_driver = { + 40 .register_em = foo_cpufreq_register_em, + 41 }; diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst index e35ab74a0f80..cf908d79666e 100644 --- a/Documentation/process/changes.rst +++ b/Documentation/process/changes.rst @@ -35,6 +35,7 @@ GNU make 3.81 make --version binutils 2.23 ld -v flex 2.5.35 flex --version bison 2.0 bison --version +pahole 1.16 pahole --version util-linux 2.10o fdformat --version kmod 13 depmod -V e2fsprogs 1.41.4 e2fsck -V @@ -54,7 +55,7 @@ mcelog 0.6 mcelog --version iptables 1.4.2 iptables -V openssl & libcrypto 1.0.0 openssl version bc 1.06.95 bc --version -Sphinx\ [#f1]_ 1.3 sphinx-build --version +Sphinx\ [#f1]_ 1.7 sphinx-build --version ====================== =============== ======================================== .. [#f1] Sphinx is needed only to build the Kernel documentation @@ -108,6 +109,16 @@ Bison Since Linux 4.16, the build system generates parsers during build. This requires bison 2.0 or later. +pahole: +------- + +Since Linux 5.2, if CONFIG_DEBUG_INFO_BTF is selected, the build system +generates BTF (BPF Type Format) from DWARF in vmlinux, a bit later from kernel +modules as well. This requires pahole v1.16 or later. + +It is found in the 'dwarves' or 'pahole' distro packages or from +https://fedorapeople.org/~acme/dwarves/. + Perl ---- diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index a0cc96923ea7..6b3aaed66fba 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -14,7 +14,8 @@ works, see Documentation/process/development-process.rst. Also, read Documentation/process/submit-checklist.rst for a list of items to check before submitting code. If you are submitting a driver, also read Documentation/process/submitting-drivers.rst; for device -tree binding patches, read Documentation/process/submitting-patches.rst. +tree binding patches, read +Documentation/devicetree/bindings/submitting-patches.rst. This documentation assumes that you're using ``git`` to prepare your patches. If you're unfamiliar with ``git``, you would be well-advised to learn how to @@ -22,8 +23,8 @@ use it, it will make your life as a kernel developer and in general much easier. Some subsystems and maintainer trees have additional information about -their workflow and expectations, see :ref:`Documentation/process/maintainer -handbooks <maintainer_handbooks_main>`. +their workflow and expectations, see +:ref:`Documentation/process/maintainer-handbooks.rst <maintainer_handbooks_main>`. Obtain a current source tree ---------------------------- diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst index 4e5b26f03d5b..b3166c4a7867 100644 --- a/Documentation/trace/ftrace.rst +++ b/Documentation/trace/ftrace.rst @@ -2442,11 +2442,10 @@ Or this simple script! #!/bin/bash tracefs=`sed -ne 's/^tracefs \(.*\) tracefs.*/\1/p' /proc/mounts` - echo nop > $tracefs/tracing/current_tracer - echo 0 > $tracefs/tracing/tracing_on - echo $$ > $tracefs/tracing/set_ftrace_pid - echo function > $tracefs/tracing/current_tracer - echo 1 > $tracefs/tracing/tracing_on + echo 0 > $tracefs/tracing_on + echo $$ > $tracefs/set_ftrace_pid + echo function > $tracefs/current_tracer + echo 1 > $tracefs/tracing_on exec "$@" diff --git a/Documentation/translations/it_IT/doc-guide/sphinx.rst b/Documentation/translations/it_IT/doc-guide/sphinx.rst index 0046d75d9a70..9762452c584c 100644 --- a/Documentation/translations/it_IT/doc-guide/sphinx.rst +++ b/Documentation/translations/it_IT/doc-guide/sphinx.rst @@ -35,7 +35,7 @@ Installazione Sphinx ==================== I marcatori ReST utilizzati nei file in Documentation/ sono pensati per essere -processati da ``Sphinx`` nella versione 1.3 o superiore. +processati da ``Sphinx`` nella versione 1.7 o superiore. Esiste uno script che verifica i requisiti Sphinx. Per ulteriori dettagli consultate :ref:`it_sphinx-pre-install`. @@ -53,11 +53,6 @@ pacchettizzato dalla vostra distribuzione. .. note:: - #) Le versioni di Sphinx inferiori alla 1.5 non funzionano bene - con il pacchetto Python docutils versione 0.13.1 o superiore. - Se volete usare queste versioni, allora dovere eseguire - ``pip install 'docutils==0.12'``. - #) Viene raccomandato l'uso del tema RTD per la documentazione in HTML. A seconda della versione di Sphinx, potrebbe essere necessaria l'installazione tramite il comando ``pip install sphinx_rtd_theme``. @@ -67,13 +62,13 @@ pacchettizzato dalla vostra distribuzione. utilizzando LaTeX. Per una corretta interpretazione, è necessario aver installato texlive con i pacchetti amdfonts e amsmath. -Riassumendo, se volete installare la versione 1.7.9 di Sphinx dovete eseguire:: +Riassumendo, se volete installare la versione 2.4.4 di Sphinx dovete eseguire:: - $ virtualenv sphinx_1.7.9 - $ . sphinx_1.7.9/bin/activate - (sphinx_1.7.9) $ pip install -r Documentation/sphinx/requirements.txt + $ virtualenv sphinx_2.4.4 + $ . sphinx_2.4.4/bin/activate + (sphinx_2.4.4) $ pip install -r Documentation/sphinx/requirements.txt -Dopo aver eseguito ``. sphinx_1.7.9/bin/activate``, il prompt cambierà per +Dopo aver eseguito ``. sphinx_2.4.4/bin/activate``, il prompt cambierà per indicare che state usando il nuovo ambiente. Se aprite un nuova sessione, prima di generare la documentazione, dovrete rieseguire questo comando per rientrare nell'ambiente virtuale. @@ -94,7 +89,7 @@ Generazione in PDF e LaTeX -------------------------- Al momento, la generazione di questi documenti è supportata solo dalle -versioni di Sphinx superiori alla 1.4. +versioni di Sphinx superiori alla 2.4. Per la generazione di PDF e LaTeX, avrete bisogno anche del pacchetto ``XeLaTeX`` nella versione 3.14159265 @@ -119,8 +114,8 @@ l'installazione:: You should run: sudo dnf install -y texlive-luatex85 - /usr/bin/virtualenv sphinx_1.7.9 - . sphinx_1.7.9/bin/activate + /usr/bin/virtualenv sphinx_2.4.4 + . sphinx_2.4.4/bin/activate pip install -r Documentation/sphinx/requirements.txt Can't build as 1 mandatory dependency is missing at ./scripts/sphinx-pre-install line 468. diff --git a/Documentation/translations/it_IT/process/changes.rst b/Documentation/translations/it_IT/process/changes.rst index 87d081889bfc..dc7193377b7f 100644 --- a/Documentation/translations/it_IT/process/changes.rst +++ b/Documentation/translations/it_IT/process/changes.rst @@ -57,7 +57,7 @@ mcelog 0.6 mcelog --version iptables 1.4.2 iptables -V openssl & libcrypto 1.0.0 openssl version bc 1.06.95 bc --version -Sphinx\ [#f1]_ 1.3 sphinx-build --version +Sphinx\ [#f1]_ 1.7 sphinx-build --version ====================== ================= ======================================== .. [#f1] Sphinx è necessario solo per produrre la documentazione del Kernel diff --git a/Documentation/translations/zh_CN/doc-guide/sphinx.rst b/Documentation/translations/zh_CN/doc-guide/sphinx.rst index 951595c7d599..23eac67fbc30 100644 --- a/Documentation/translations/zh_CN/doc-guide/sphinx.rst +++ b/Documentation/translations/zh_CN/doc-guide/sphinx.rst @@ -26,7 +26,7 @@ reStructuredText文件å¯èƒ½åŒ…å«åŒ…嫿¥è‡ªæºæ–‡ä»¶çš„结构化文档注释æ 安装Sphinx ========== -Documentation/ 下的ReST文件现在使用sphinx1.3或更高版本构建。 +Documentation/ 下的ReST文件现在使用sphinx1.7或更高版本构建。 这有一个脚本å¯ä»¥æ£€æŸ¥Sphinxçš„ä¾èµ–项。更多详细信æ¯è§ :ref:`sphinx-pre-install_zh` 。 @@ -40,22 +40,19 @@ Documentation/ 下的ReST文件现在使用sphinx1.3或更高版本构建。 .. note:: - #) 低于1.5版本的Sphinxæ— æ³•ä¸ŽPythonçš„0.13.1或更高版本docutils一起æ£å¸¸å·¥ä½œã€‚ - 如果您想使用这些版本,那么应该è¿è¡Œ ``pip install 'docutils==0.12'`` 。 - #) html输出建议使用RTDä¸»é¢˜ã€‚æ ¹æ®Sphinx版本的ä¸åŒï¼Œå®ƒåº”该用 ``pip install sphinx_rtd_theme`` å•独安装。 #) 一些ReST页é¢åŒ…嫿•°å¦è¡¨è¾¾å¼ã€‚由于Sphinx的工作方å¼ï¼Œè¿™äº›è¡¨è¾¾å¼æ˜¯ä½¿ç”¨ LaTeX 编写的。它需è¦å®‰è£…amsfontså’Œamsmathå®åŒ…,以便显示。 -总之,如您è¦å®‰è£…Sphinx 1.7.9版本,应执行:: +总之,如您è¦å®‰è£…Sphinx 2.4.4版本,应执行:: - $ virtualenv sphinx_1.7.9 - $ . sphinx_1.7.9/bin/activate - (sphinx_1.7.9) $ pip install -r Documentation/sphinx/requirements.txt + $ virtualenv sphinx_2.4.4 + $ . sphinx_2.4.4/bin/activate + (sphinx_2.4.4) $ pip install -r Documentation/sphinx/requirements.txt -在è¿è¡Œ ``. sphinx_1.7.9/bin/activate`` 之åŽï¼Œæç¤ºç¬¦å°†å˜åŒ–,以指示您æ£åœ¨ä½¿ç”¨æ–° +在è¿è¡Œ ``. sphinx_2.4.4/bin/activate`` 之åŽï¼Œæç¤ºç¬¦å°†å˜åŒ–,以指示您æ£åœ¨ä½¿ç”¨æ–° 环境。如果您打开了一个新的shell,那么在构建文档之å‰ï¼Œæ‚¨éœ€è¦é‡æ–°è¿è¡Œæ¤å‘½ä»¤ä»¥å† 次进入虚拟环境ä¸ã€‚ @@ -71,7 +68,7 @@ Documentation/ 下的ReST文件现在使用sphinx1.3或更高版本构建。 PDFå’ŒLaTeX构建 -------------- -ç›®å‰åªæœ‰Sphinx 1.4åŠæ›´é«˜ç‰ˆæœ¬æ‰æ”¯æŒè¿™ç§æž„建。 +ç›®å‰åªæœ‰Sphinx 2.4åŠæ›´é«˜ç‰ˆæœ¬æ‰æ”¯æŒè¿™ç§æž„建。 对于PDFå’ŒLaTeXè¾“å‡ºï¼Œè¿˜éœ€è¦ ``XeLaTeX`` 3.14159265版本。(译注:æ¤ç‰ˆæœ¬å·çœŸå®ž å˜åœ¨ï¼‰ @@ -93,8 +90,8 @@ PDFå’ŒLaTeX构建 You should run: sudo dnf install -y texlive-luatex85 - /usr/bin/virtualenv sphinx_1.7.9 - . sphinx_1.7.9/bin/activate + /usr/bin/virtualenv sphinx_2.4.4 + . sphinx_2.4.4/bin/activate pip install -r Documentation/sphinx/requirements.txt Can't build as 1 mandatory dependency is missing at ./scripts/sphinx-pre-install line 468. diff --git a/Documentation/translations/zh_CN/process/management-style.rst b/Documentation/translations/zh_CN/process/management-style.rst index c6a5bb285797..8053ae474328 100644 --- a/Documentation/translations/zh_CN/process/management-style.rst +++ b/Documentation/translations/zh_CN/process/management-style.rst @@ -36,14 +36,14 @@ Linuxå†…æ ¸ç®¡ç†é£Žæ ¼ æ¯ä¸ªäººéƒ½è®¤ä¸ºç®¡ç†è€…åšå†³å®šï¼Œè€Œä¸”决ç–很é‡è¦ã€‚决定越大越痛苦,管ç†è€…就必须越高级。 这很明显,但事实并éžå¦‚æ¤ã€‚ -游æˆçš„åå—æ˜¯ **é¿å…** åšå‡ºå†³å®šã€‚å°¤å…¶æ˜¯ï¼Œå¦‚æžœæœ‰äººå‘Šè¯‰ä½ â€œé€‰æ‹©ï¼ˆa)或(b), +最é‡è¦çš„æ˜¯ **é¿å…** åšå‡ºå†³å®šã€‚å°¤å…¶æ˜¯ï¼Œå¦‚æžœæœ‰äººå‘Šè¯‰ä½ â€œé€‰æ‹©ï¼ˆa)或(b), 我们真的需è¦ä½ æ¥åšå†³å®šâ€ï¼Œä½ 就是陷入麻烦的管ç†è€…ã€‚ä½ ç®¡ç†çš„äººæ¯”ä½ æ›´äº†è§£ç»†èŠ‚ï¼Œ æ‰€ä»¥å¦‚æžœä»–ä»¬æ¥æ‰¾ä½ åšæŠ€æœ¯å†³ç–ï¼Œä½ å®Œè›‹äº†ã€‚ä½ æ˜¾ç„¶æ²¡æœ‰èƒ½åŠ›ä¸ºä»–ä»¬åšè¿™ä¸ªå†³å®šã€‚ ï¼ˆæŽ¨è®ºï¼šå¦‚æžœä½ ç®¡ç†çš„äººä¸æ¯”ä½ æ›´äº†è§£ç»†èŠ‚ï¼Œä½ ä¹Ÿä¼šè¢«æžç ¸ï¼Œå°½ç®¡åŽŸå› å®Œå…¨ä¸åŒã€‚ ä¹Ÿå°±æ˜¯è¯´ï¼Œä½ çš„å·¥ä½œæ˜¯é”™çš„ï¼Œä»–ä»¬åº”è¯¥ç®¡ç†ä½ çš„æ‰æ™ºï¼‰ -所以游æˆçš„åå—æ˜¯ **é¿å…** åšå‡ºå†³å®šï¼Œè‡³å°‘是那些大而痛苦的决定。åšä¸€äº›å°çš„ +所以最é‡è¦çš„æ˜¯ **é¿å…** åšå‡ºå†³å®šï¼Œè‡³å°‘是那些大而痛苦的决定。åšä¸€äº›å°çš„ å’Œéžç»“果性的决定是很好的,并且使您看起æ¥å¥½åƒçŸ¥é“自己在åšä»€ä¹ˆï¼Œæ‰€ä»¥å†…æ ¸ç®¡ç†è€… 需è¦åšçš„æ˜¯å°†é‚£äº›å¤§çš„å’Œç—›è‹¦çš„å†³å®šå˜æˆé‚£äº›æ²¡æœ‰äººçœŸæ£å…³å¿ƒçš„å°äº‹æƒ…。 diff --git a/MAINTAINERS b/MAINTAINERS index 7a2345ce8521..13f9a84a617e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2263,6 +2263,15 @@ L: linux-iio@vger.kernel.org S: Maintained F: drivers/counter/microchip-tcb-capture.c +ARM/MILBEAUT ARCHITECTURE +M: Taichi Sugaya <sugaya.taichi@socionext.com> +M: Takao Orito <orito.takao@socionext.com> +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +S: Maintained +F: arch/arm/boot/dts/milbeaut* +F: arch/arm/mach-milbeaut/ +N: milbeaut + ARM/MIOA701 MACHINE SUPPORT M: Robert Jarzmik <robert.jarzmik@free.fr> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@ -2729,10 +2738,11 @@ S: Maintained F: drivers/memory/*emif* ARM/TEXAS INSTRUMENT KEYSTONE ARCHITECTURE +M: Nishanth Menon <nm@ti.com> M: Santosh Shilimkar <ssantosh@kernel.org> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/ti/linux.git F: arch/arm/boot/dts/keystone-* F: arch/arm/mach-keystone/ @@ -3570,13 +3580,14 @@ L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/broadcom/b44.* -BROADCOM B53 ETHERNET SWITCH DRIVER +BROADCOM B53/SF2 ETHERNET SWITCH DRIVER M: Florian Fainelli <f.fainelli@gmail.com> L: netdev@vger.kernel.org L: openwrt-devel@lists.openwrt.org (subscribers-only) S: Supported F: Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml F: drivers/net/dsa/b53/* +F: drivers/net/dsa/bcm_sf2* F: include/linux/dsa/brcm.h F: include/linux/platform_data/b53.h @@ -3733,7 +3744,7 @@ F: drivers/scsi/bnx2i/ BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER M: Ariel Elior <aelior@marvell.com> M: Sudarsana Kalluru <skalluru@marvell.com> -M: GR-everest-linux-l2@marvell.com +M: Manish Chopra <manishc@marvell.com> L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/broadcom/bnx2x/ @@ -9318,7 +9329,6 @@ S: Maintained F: drivers/iio/pressure/dps310.c INFINIBAND SUBSYSTEM -M: Doug Ledford <dledford@redhat.com> M: Jason Gunthorpe <jgg@nvidia.com> L: linux-rdma@vger.kernel.org S: Supported @@ -10269,9 +10279,9 @@ F: lib/Kconfig.kcsan F: scripts/Makefile.kcsan KDUMP -M: Dave Young <dyoung@redhat.com> M: Baoquan He <bhe@redhat.com> R: Vivek Goyal <vgoyal@redhat.com> +R: Dave Young <dyoung@redhat.com> L: kexec@lists.infradead.org S: Maintained W: http://lse.sourceforge.net/kdump/ @@ -10445,7 +10455,7 @@ F: arch/riscv/include/uapi/asm/kvm* F: arch/riscv/kvm/ KERNEL VIRTUAL MACHINE for s390 (KVM/s390) -M: Christian Borntraeger <borntraeger@de.ibm.com> +M: Christian Borntraeger <borntraeger@linux.ibm.com> M: Janosch Frank <frankja@linux.ibm.com> R: David Hildenbrand <david@redhat.com> R: Claudio Imbrenda <imbrenda@linux.ibm.com> @@ -12169,8 +12179,8 @@ F: drivers/net/ethernet/mellanox/mlx5/core/fpga/* F: include/linux/mlx5/mlx5_ifc_fpga.h MELLANOX ETHERNET SWITCH DRIVERS -M: Jiri Pirko <jiri@nvidia.com> M: Ido Schimmel <idosch@nvidia.com> +M: Petr Machata <petrm@nvidia.com> L: netdev@vger.kernel.org S: Supported W: http://www.mellanox.com @@ -15593,7 +15603,7 @@ F: drivers/scsi/qedi/ QLOGIC QL4xxx ETHERNET DRIVER M: Ariel Elior <aelior@marvell.com> -M: GR-everest-linux-l2@marvell.com +M: Manish Chopra <manishc@marvell.com> L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/qlogic/qed/ @@ -15760,6 +15770,15 @@ S: Maintained F: Documentation/devicetree/bindings/net/qcom,ethqos.txt F: drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +QUALCOMM FASTRPC DRIVER +M: Srinivas Kandagatla <srinivas.kandagatla@linaro.org> +M: Amol Maheshwari <amahesh@qti.qualcomm.com> +L: linux-arm-msm@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/misc/qcom,fastrpc.txt +F: drivers/misc/fastrpc.c +F: include/uapi/misc/fastrpc.h + QUALCOMM GENERIC INTERFACE I2C DRIVER M: Akash Asthana <akashast@codeaurora.org> M: Mukesh Savaliya <msavaliy@codeaurora.org> @@ -15968,6 +15987,7 @@ F: arch/mips/generic/board-ranchu.c RANDOM NUMBER DRIVER M: "Theodore Ts'o" <tytso@mit.edu> +M: Jason A. Donenfeld <Jason@zx2c4.com> S: Maintained F: drivers/char/random.c @@ -16490,6 +16510,12 @@ T: git git://linuxtv.org/media_tree.git F: Documentation/devicetree/bindings/media/allwinner,sun8i-a83t-de2-rotate.yaml F: drivers/media/platform/sunxi/sun8i-rotate/ +RPMSG TTY DRIVER +M: Arnaud Pouliquen <arnaud.pouliquen@foss.st.com> +L: linux-remoteproc@vger.kernel.org +S: Maintained +F: drivers/tty/rpmsg_tty.c + RTL2830 MEDIA DRIVER M: Antti Palosaari <crope@iki.fi> L: linux-media@vger.kernel.org @@ -16573,7 +16599,7 @@ F: drivers/video/fbdev/savage/ S390 M: Heiko Carstens <hca@linux.ibm.com> M: Vasily Gorbik <gor@linux.ibm.com> -M: Christian Borntraeger <borntraeger@de.ibm.com> +M: Christian Borntraeger <borntraeger@linux.ibm.com> R: Alexander Gordeev <agordeev@linux.ibm.com> L: linux-s390@vger.kernel.org S: Supported @@ -16611,8 +16637,8 @@ W: http://www.ibm.com/developerworks/linux/linux390/ F: drivers/iommu/s390-iommu.c S390 IUCV NETWORK LAYER -M: Julian Wiedmann <jwi@linux.ibm.com> -M: Karsten Graul <kgraul@linux.ibm.com> +M: Alexandra Winter <wintera@linux.ibm.com> +M: Wenjia Zhang <wenjia@linux.ibm.com> L: linux-s390@vger.kernel.org L: netdev@vger.kernel.org S: Supported @@ -16622,8 +16648,8 @@ F: include/net/iucv/ F: net/iucv/ S390 NETWORK DRIVERS -M: Julian Wiedmann <jwi@linux.ibm.com> -M: Karsten Graul <kgraul@linux.ibm.com> +M: Alexandra Winter <wintera@linux.ibm.com> +M: Wenjia Zhang <wenjia@linux.ibm.com> L: linux-s390@vger.kernel.org L: netdev@vger.kernel.org S: Supported @@ -18483,6 +18509,7 @@ F: include/uapi/linux/pkt_sched.h F: include/uapi/linux/tc_act/ F: include/uapi/linux/tc_ematch/ F: net/sched/ +F: tools/testing/selftests/tc-testing TC90522 MEDIA DRIVER M: Akihiro Tsukada <tskd08@gmail.com> @@ -19031,11 +19058,12 @@ F: drivers/mmc/host/tifm_sd.c F: include/linux/tifm.h TI KEYSTONE MULTICORE NAVIGATOR DRIVERS +M: Nishanth Menon <nm@ti.com> M: Santosh Shilimkar <ssantosh@kernel.org> L: linux-kernel@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/ti/linux.git F: drivers/soc/ti/* TI LM49xxx FAMILY ASoC CODEC DRIVERS @@ -20317,7 +20345,8 @@ F: arch/x86/include/asm/vmware.h F: arch/x86/kernel/cpu/vmware.c VMWARE PVRDMA DRIVER -M: Adit Ranadive <aditr@vmware.com> +M: Bryan Tan <bryantan@vmware.com> +M: Vishnu Dasa <vdasa@vmware.com> M: VMware PV-Drivers <pv-drivers@vmware.com> L: linux-rdma@vger.kernel.org S: Maintained @@ -2,8 +2,8 @@ VERSION = 5 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc1 -NAME = Trick or Treat +EXTRAVERSION = -rc5 +NAME = Gobble Gobble # *DOCUMENTATION* # To see a list of typical targets execute "make help" @@ -789,7 +789,7 @@ stackp-flags-$(CONFIG_STACKPROTECTOR_STRONG) := -fstack-protector-strong KBUILD_CFLAGS += $(stackp-flags-y) KBUILD_CFLAGS-$(CONFIG_WERROR) += -Werror -KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH) +KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH:"%"=%) ifdef CONFIG_CC_IS_CLANG KBUILD_CPPFLAGS += -Qunused-arguments @@ -1374,17 +1374,17 @@ endif ifneq ($(dtstree),) -%.dtb: dt_binding_check include/config/kernel.release scripts_dtc - $(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@ $(dtstree)/$*.dt.yaml +%.dtb: include/config/kernel.release scripts_dtc + $(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@ -%.dtbo: dt_binding_check include/config/kernel.release scripts_dtc - $(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@ $(dtstree)/$*.dt.yaml +%.dtbo: include/config/kernel.release scripts_dtc + $(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@ PHONY += dtbs dtbs_install dtbs_check dtbs: include/config/kernel.release scripts_dtc $(Q)$(MAKE) $(build)=$(dtstree) -ifneq ($(filter dtbs_check %.dtb %.dtbo, $(MAKECMDGOALS)),) +ifneq ($(filter dtbs_check, $(MAKECMDGOALS)),) export CHECK_DTBS=y dtbs: dt_binding_check endif diff --git a/arch/Kconfig b/arch/Kconfig index 26b8ed11639d..d3c4ab249e9c 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -991,6 +991,16 @@ config HAVE_ARCH_COMPAT_MMAP_BASES and vice-versa 32-bit applications to call 64-bit mmap(). Required for applications doing different bitness syscalls. +config PAGE_SIZE_LESS_THAN_64KB + def_bool y + depends on !ARM64_64K_PAGES + depends on !IA64_PAGE_SIZE_64KB + depends on !PAGE_SIZE_64KB + depends on !PARISC_PAGE_SIZE_64KB + depends on !PPC_64K_PAGES + depends on !PPC_256K_PAGES + depends on !PAGE_SIZE_256KB + # This allows to use a set of generic functions to determine mmap base # address by giving priority to top-down scheme only if the process # is not in legacy mode (compat task, unlimited stack size or diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index e4a041cd5715..ca5a32228cd6 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -488,3 +488,4 @@ 556 common landlock_restrict_self sys_landlock_restrict_self # 557 reserved for memfd_secret 558 common process_mrelease sys_process_mrelease +559 common futex_waitv sys_futex_waitv diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h index e8c2c7469e10..e201b4b1655a 100644 --- a/arch/arc/include/asm/cacheflush.h +++ b/arch/arc/include/asm/cacheflush.h @@ -36,7 +36,6 @@ void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 void flush_dcache_page(struct page *page); -void flush_dcache_folio(struct folio *folio); void dma_cache_wback_inv(phys_addr_t start, unsigned long sz); void dma_cache_inv(phys_addr_t start, unsigned long sz); diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 2948487346dc..f9aa0860f83e 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1462,6 +1462,7 @@ config HIGHMEM bool "High Memory Support" depends on MMU select KMAP_LOCAL + select KMAP_LOCAL_NON_LINEAR_PTE_ARRAY help The address space of ARM processors is only 4 Gigabytes large and it has to accommodate user address space, kernel address diff --git a/arch/arm/boot/dts/bcm2711.dtsi b/arch/arm/boot/dts/bcm2711.dtsi index 3b60297af7f6..9e01dbca4a01 100644 --- a/arch/arm/boot/dts/bcm2711.dtsi +++ b/arch/arm/boot/dts/bcm2711.dtsi @@ -506,11 +506,17 @@ #address-cells = <3>; #interrupt-cells = <1>; #size-cells = <2>; - interrupts = <GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>, + interrupts = <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "pcie", "msi"; interrupt-map-mask = <0x0 0x0 0x0 0x7>; interrupt-map = <0 0 0 1 &gicv2 GIC_SPI 143 + IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 2 &gicv2 GIC_SPI 144 + IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 3 &gicv2 GIC_SPI 145 + IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 4 &gicv2 GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>; msi-controller; msi-parent = <&pcie0>; diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi index d4f355015e3c..f69d2af3c1fa 100644 --- a/arch/arm/boot/dts/bcm5301x.dtsi +++ b/arch/arm/boot/dts/bcm5301x.dtsi @@ -242,6 +242,8 @@ gpio-controller; #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; }; pcie0: pcie@12000 { @@ -408,7 +410,7 @@ i2c0: i2c@18009000 { compatible = "brcm,iproc-i2c"; reg = <0x18009000 0x50>; - interrupts = <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>; #address-cells = <1>; #size-cells = <0>; clock-frequency = <100000>; diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index e68fb879e4f9..5e56288e343b 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -290,7 +290,6 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr */ #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *); -void flush_dcache_folio(struct folio *folio); #define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 static inline void flush_kernel_vmap_range(void *addr, int size) diff --git a/arch/arm/mach-socfpga/core.h b/arch/arm/mach-socfpga/core.h index fc2608b18a0d..18f01190dcfd 100644 --- a/arch/arm/mach-socfpga/core.h +++ b/arch/arm/mach-socfpga/core.h @@ -33,7 +33,7 @@ extern void __iomem *sdr_ctl_base_addr; u32 socfpga_sdram_self_refresh(u32 sdr_base); extern unsigned int socfpga_sdram_self_refresh_sz; -extern char secondary_trampoline, secondary_trampoline_end; +extern char secondary_trampoline[], secondary_trampoline_end[]; extern unsigned long socfpga_cpu1start_addr; diff --git a/arch/arm/mach-socfpga/platsmp.c b/arch/arm/mach-socfpga/platsmp.c index fbb80b883e5d..201191cf68f3 100644 --- a/arch/arm/mach-socfpga/platsmp.c +++ b/arch/arm/mach-socfpga/platsmp.c @@ -20,14 +20,14 @@ static int socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle) { - int trampoline_size = &secondary_trampoline_end - &secondary_trampoline; + int trampoline_size = secondary_trampoline_end - secondary_trampoline; if (socfpga_cpu1start_addr) { /* This will put CPU #1 into reset. */ writel(RSTMGR_MPUMODRST_CPU1, rst_manager_base_addr + SOCFPGA_RSTMGR_MODMPURST); - memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size); + memcpy(phys_to_virt(0), secondary_trampoline, trampoline_size); writel(__pa_symbol(secondary_startup), sys_manager_base_addr + (socfpga_cpu1start_addr & 0x000000ff)); @@ -45,12 +45,12 @@ static int socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle) static int socfpga_a10_boot_secondary(unsigned int cpu, struct task_struct *idle) { - int trampoline_size = &secondary_trampoline_end - &secondary_trampoline; + int trampoline_size = secondary_trampoline_end - secondary_trampoline; if (socfpga_cpu1start_addr) { writel(RSTMGR_MPUMODRST_CPU1, rst_manager_base_addr + SOCFPGA_A10_RSTMGR_MODMPURST); - memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size); + memcpy(phys_to_virt(0), secondary_trampoline, trampoline_size); writel(__pa_symbol(secondary_startup), sys_manager_base_addr + (socfpga_cpu1start_addr & 0x00000fff)); diff --git a/arch/arm64/boot/dts/apple/t8103.dtsi b/arch/arm64/boot/dts/apple/t8103.dtsi index fc8b2bb06ffe..e22c9433d5e0 100644 --- a/arch/arm64/boot/dts/apple/t8103.dtsi +++ b/arch/arm64/boot/dts/apple/t8103.dtsi @@ -7,6 +7,7 @@ * Copyright The Asahi Linux Contributors */ +#include <dt-bindings/gpio/gpio.h> #include <dt-bindings/interrupt-controller/apple-aic.h> #include <dt-bindings/interrupt-controller/irq.h> #include <dt-bindings/pinctrl/apple.h> @@ -281,7 +282,7 @@ port00: pci@0,0 { device_type = "pci"; reg = <0x0 0x0 0x0 0x0 0x0>; - reset-gpios = <&pinctrl_ap 152 0>; + reset-gpios = <&pinctrl_ap 152 GPIO_ACTIVE_LOW>; max-link-speed = <2>; #address-cells = <3>; @@ -301,7 +302,7 @@ port01: pci@1,0 { device_type = "pci"; reg = <0x800 0x0 0x0 0x0 0x0>; - reset-gpios = <&pinctrl_ap 153 0>; + reset-gpios = <&pinctrl_ap 153 GPIO_ACTIVE_LOW>; max-link-speed = <2>; #address-cells = <3>; @@ -321,7 +322,7 @@ port02: pci@2,0 { device_type = "pci"; reg = <0x1000 0x0 0x0 0x0 0x0>; - reset-gpios = <&pinctrl_ap 33 0>; + reset-gpios = <&pinctrl_ap 33 GPIO_ACTIVE_LOW>; max-link-speed = <1>; #address-cells = <3>; diff --git a/arch/arm64/boot/dts/exynos/exynosautov9.dtsi b/arch/arm64/boot/dts/exynos/exynosautov9.dtsi index 3e4727344b4a..a960c0bc2dba 100644 --- a/arch/arm64/boot/dts/exynos/exynosautov9.dtsi +++ b/arch/arm64/boot/dts/exynos/exynosautov9.dtsi @@ -296,8 +296,7 @@ pinctrl-0 = <&ufs_rst_n &ufs_refclk_out>; phys = <&ufs_0_phy>; phy-names = "ufs-phy"; - samsung,sysreg = <&syscon_fsys2>; - samsung,ufs-shareability-reg-offset = <0x710>; + samsung,sysreg = <&syscon_fsys2 0x710>; status = "disabled"; }; }; diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index 347b0cc68f07..1494cfa8639b 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -12,6 +12,17 @@ #define HAVE_FUNCTION_GRAPH_FP_TEST +/* + * HAVE_FUNCTION_GRAPH_RET_ADDR_PTR means that the architecture can provide a + * "return address pointer" which can be used to uniquely identify a return + * address which has been overwritten. + * + * On arm64 we use the address of the caller's frame record, which remains the + * same for the lifetime of the instrumented function, unlike the return + * address in the LR. + */ +#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR + #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS #define ARCH_SUPPORTS_FTRACE_OPS 1 #else diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index a39fcf318c77..01d47c5886dc 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -91,7 +91,7 @@ #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) /* TCR_EL2 Registers bits */ -#define TCR_EL2_RES1 ((1 << 31) | (1 << 23)) +#define TCR_EL2_RES1 ((1U << 31) | (1 << 23)) #define TCR_EL2_TBI (1 << 20) #define TCR_EL2_PS_SHIFT 16 #define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT) @@ -276,7 +276,7 @@ #define CPTR_EL2_TFP_SHIFT 10 /* Hyp Coprocessor Trap Register */ -#define CPTR_EL2_TCPAC (1 << 31) +#define CPTR_EL2_TCPAC (1U << 31) #define CPTR_EL2_TAM (1 << 30) #define CPTR_EL2_TTA (1 << 20) #define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT) diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 8433a2058eb1..237224484d0f 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -76,7 +76,7 @@ static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep, static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep) { - VM_BUG_ON(mm != &init_mm); + VM_BUG_ON(mm && mm != &init_mm); __pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE | PMD_TABLE_UXN); } diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index a4e046ef4568..6564a01cc085 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -47,9 +47,6 @@ struct stack_info { * @prev_type: The type of stack this frame record was on, or a synthetic * value of STACK_TYPE_UNKNOWN. This is used to detect a * transition from one stack to another. - * - * @graph: When FUNCTION_GRAPH_TRACER is selected, holds the index of a - * replacement lr value in the ftrace graph stack. */ struct stackframe { unsigned long fp; @@ -57,9 +54,6 @@ struct stackframe { DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES); unsigned long prev_fp; enum stack_type prev_type; -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - int graph; -#endif #ifdef CONFIG_KRETPROBES struct llist_node *kr_cur; #endif diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 6e2e0b7031ab..3a5ff5e20586 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -281,12 +281,22 @@ do { \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ } while (0) +/* + * We must not call into the scheduler between uaccess_ttbr0_enable() and + * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions, + * we must evaluate these outside of the critical section. + */ #define __raw_get_user(x, ptr, err) \ do { \ + __typeof__(*(ptr)) __user *__rgu_ptr = (ptr); \ + __typeof__(x) __rgu_val; \ __chk_user_ptr(ptr); \ + \ uaccess_ttbr0_enable(); \ - __raw_get_mem("ldtr", x, ptr, err); \ + __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err); \ uaccess_ttbr0_disable(); \ + \ + (x) = __rgu_val; \ } while (0) #define __get_user_error(x, ptr, err) \ @@ -310,14 +320,22 @@ do { \ #define get_user __get_user +/* + * We must not call into the scheduler between __uaccess_enable_tco_async() and + * __uaccess_disable_tco_async(). As `dst` and `src` may contain blocking + * functions, we must evaluate these outside of the critical section. + */ #define __get_kernel_nofault(dst, src, type, err_label) \ do { \ + __typeof__(dst) __gkn_dst = (dst); \ + __typeof__(src) __gkn_src = (src); \ int __gkn_err = 0; \ \ __uaccess_enable_tco_async(); \ - __raw_get_mem("ldr", *((type *)(dst)), \ - (__force type *)(src), __gkn_err); \ + __raw_get_mem("ldr", *((type *)(__gkn_dst)), \ + (__force type *)(__gkn_src), __gkn_err); \ __uaccess_disable_tco_async(); \ + \ if (unlikely(__gkn_err)) \ goto err_label; \ } while (0) @@ -351,11 +369,19 @@ do { \ } \ } while (0) +/* + * We must not call into the scheduler between uaccess_ttbr0_enable() and + * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions, + * we must evaluate these outside of the critical section. + */ #define __raw_put_user(x, ptr, err) \ do { \ - __chk_user_ptr(ptr); \ + __typeof__(*(ptr)) __user *__rpu_ptr = (ptr); \ + __typeof__(*(ptr)) __rpu_val = (x); \ + __chk_user_ptr(__rpu_ptr); \ + \ uaccess_ttbr0_enable(); \ - __raw_put_mem("sttr", x, ptr, err); \ + __raw_put_mem("sttr", __rpu_val, __rpu_ptr, err); \ uaccess_ttbr0_disable(); \ } while (0) @@ -380,14 +406,22 @@ do { \ #define put_user __put_user +/* + * We must not call into the scheduler between __uaccess_enable_tco_async() and + * __uaccess_disable_tco_async(). As `dst` and `src` may contain blocking + * functions, we must evaluate these outside of the critical section. + */ #define __put_kernel_nofault(dst, src, type, err_label) \ do { \ + __typeof__(dst) __pkn_dst = (dst); \ + __typeof__(src) __pkn_src = (src); \ int __pkn_err = 0; \ \ __uaccess_enable_tco_async(); \ - __raw_put_mem("str", *((type *)(src)), \ - (__force type *)(dst), __pkn_err); \ + __raw_put_mem("str", *((type *)(__pkn_src)), \ + (__force type *)(__pkn_dst), __pkn_err); \ __uaccess_disable_tco_async(); \ + \ if (unlikely(__pkn_err)) \ goto err_label; \ } while(0) diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index b3e4f9a088b1..8cf970d219f5 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -77,11 +77,17 @@ .endm SYM_CODE_START(ftrace_regs_caller) +#ifdef BTI_C + BTI_C +#endif ftrace_regs_entry 1 b ftrace_common SYM_CODE_END(ftrace_regs_caller) SYM_CODE_START(ftrace_caller) +#ifdef BTI_C + BTI_C +#endif ftrace_regs_entry 0 b ftrace_common SYM_CODE_END(ftrace_caller) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index fc62dfe73f93..4506c4a90ac1 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -244,8 +244,6 @@ void arch_ftrace_update_code(int command) * on the way back to parent. For this purpose, this function is called * in _mcount() or ftrace_caller() to replace return address (*parent) on * the call stack to return_to_handler. - * - * Note that @frame_pointer is used only for sanity check later. */ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, unsigned long frame_pointer) @@ -263,8 +261,10 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, */ old = *parent; - if (!function_graph_enter(old, self_addr, frame_pointer, NULL)) + if (!function_graph_enter(old, self_addr, frame_pointer, + (void *)frame_pointer)) { *parent = return_hooker; + } } #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 1038494135c8..6fb31c117ebe 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -147,7 +147,7 @@ int machine_kexec_post_load(struct kimage *kimage) if (rc) return rc; kimage->arch.ttbr1 = __pa(trans_pgd); - kimage->arch.zero_page = __pa(empty_zero_page); + kimage->arch.zero_page = __pa_symbol(empty_zero_page); reloc_size = __relocate_new_kernel_end - __relocate_new_kernel_start; memcpy(reloc_code, __relocate_new_kernel_start, reloc_size); diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index c30624fff6ac..94f83cd44e50 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -38,9 +38,6 @@ void start_backtrace(struct stackframe *frame, unsigned long fp, { frame->fp = fp; frame->pc = pc; -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - frame->graph = 0; -#endif #ifdef CONFIG_KRETPROBES frame->kr_cur = NULL; #endif @@ -116,20 +113,23 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) frame->prev_fp = fp; frame->prev_type = info.type; + frame->pc = ptrauth_strip_insn_pac(frame->pc); + #ifdef CONFIG_FUNCTION_GRAPH_TRACER if (tsk->ret_stack && - (ptrauth_strip_insn_pac(frame->pc) == (unsigned long)return_to_handler)) { - struct ftrace_ret_stack *ret_stack; + (frame->pc == (unsigned long)return_to_handler)) { + unsigned long orig_pc; /* * This is a case where function graph tracer has * modified a return address (LR) in a stack frame * to hook a function return. * So replace it to an original value. */ - ret_stack = ftrace_graph_get_ret_stack(tsk, frame->graph++); - if (WARN_ON_ONCE(!ret_stack)) + orig_pc = ftrace_graph_ret_addr(tsk, NULL, frame->pc, + (void *)frame->fp); + if (WARN_ON_ONCE(frame->pc == orig_pc)) return -EINVAL; - frame->pc = ret_stack->ret; + frame->pc = orig_pc; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ #ifdef CONFIG_KRETPROBES @@ -137,8 +137,6 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) frame->pc = kretprobe_find_ret_addr(tsk, (void *)frame->fp, &frame->kr_cur); #endif - frame->pc = ptrauth_strip_insn_pac(frame->pc); - return 0; } NOKPROBE_SYMBOL(unwind_frame); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 2f03cbfefe67..e4727dc771bf 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -223,7 +223,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 1; break; case KVM_CAP_NR_VCPUS: - r = num_online_cpus(); + /* + * ARM64 treats KVM_CAP_NR_CPUS differently from all other + * architectures, as it does not always bound it to + * KVM_CAP_MAX_VCPUS. It should not matter much because + * this is just an advisory value. + */ + r = min_t(unsigned int, num_online_cpus(), + kvm_arm_default_max_vcpus()); break; case KVM_CAP_MAX_VCPUS: case KVM_CAP_MAX_VCPU_ID: diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 7a0af1d39303..96c5f3fb7838 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -403,6 +403,8 @@ typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *); static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu); +static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code); + /* * Allow the hypervisor to handle the exit with an exit handler if it has one. * @@ -429,6 +431,18 @@ static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code) */ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) { + /* + * Save PSTATE early so that we can evaluate the vcpu mode + * early on. + */ + vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR); + + /* + * Check whether we want to repaint the state one way or + * another. + */ + early_exit_filter(vcpu, exit_code); + if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index de7e14c862e6..7ecca8b07851 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -70,7 +70,12 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) { ctxt->regs.pc = read_sysreg_el2(SYS_ELR); - ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR); + /* + * Guest PSTATE gets saved at guest fixup time in all + * cases. We still need to handle the nVHE host side here. + */ + if (!has_vhe() && ctxt->__hyp_running_vcpu) + ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR); if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2); diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index c0e3fed26d93..d13115a12434 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -233,7 +233,7 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) * Returns false if the guest ran in AArch32 when it shouldn't have, and * thus should exit to the host, or true if a the guest run loop can continue. */ -static bool handle_aarch32_guest(struct kvm_vcpu *vcpu, u64 *exit_code) +static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) { struct kvm *kvm = kern_hyp_va(vcpu->kvm); @@ -248,10 +248,7 @@ static bool handle_aarch32_guest(struct kvm_vcpu *vcpu, u64 *exit_code) vcpu->arch.target = -1; *exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT); *exit_code |= ARM_EXCEPTION_IL; - return false; } - - return true; } /* Switch to the guest for legacy non-VHE systems */ @@ -316,9 +313,6 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) /* Jump in the fire! */ exit_code = __guest_enter(vcpu); - if (unlikely(!handle_aarch32_guest(vcpu, &exit_code))) - break; - /* And we're baaack! */ } while (fixup_guest_exit(vcpu, &exit_code)); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 5a2cb5d9bc4b..fbb26b93c347 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -112,6 +112,10 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) return hyp_exit_handlers; } +static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) +{ +} + /* Switch to the guest for VHE systems running in EL2 */ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) { diff --git a/arch/csky/kernel/traps.c b/arch/csky/kernel/traps.c index e5fbf8653a21..2020af88b636 100644 --- a/arch/csky/kernel/traps.c +++ b/arch/csky/kernel/traps.c @@ -209,7 +209,7 @@ asmlinkage void do_trap_illinsn(struct pt_regs *regs) asmlinkage void do_trap_fpe(struct pt_regs *regs) { -#ifdef CONFIG_CPU_HAS_FP +#ifdef CONFIG_CPU_HAS_FPU return fpu_fpe(regs); #else do_trap_error(regs, SIGILL, ILL_ILLOPC, regs->pc, @@ -219,7 +219,7 @@ asmlinkage void do_trap_fpe(struct pt_regs *regs) asmlinkage void do_trap_priv(struct pt_regs *regs) { -#ifdef CONFIG_CPU_HAS_FP +#ifdef CONFIG_CPU_HAS_FPU if (user_mode(regs) && fpu_libc_helper(regs)) return; #endif diff --git a/arch/hexagon/include/asm/timer-regs.h b/arch/hexagon/include/asm/timer-regs.h deleted file mode 100644 index ee6c61423a05..000000000000 --- a/arch/hexagon/include/asm/timer-regs.h +++ /dev/null @@ -1,26 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Timer support for Hexagon - * - * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. - */ - -#ifndef _ASM_TIMER_REGS_H -#define _ASM_TIMER_REGS_H - -/* This stuff should go into a platform specific file */ -#define TCX0_CLK_RATE 19200 -#define TIMER_ENABLE 0 -#define TIMER_CLR_ON_MATCH 1 - -/* - * 8x50 HDD Specs 5-8. Simulator co-sim not fixed until - * release 1.1, and then it's "adjustable" and probably not defaulted. - */ -#define RTOS_TIMER_INT 3 -#ifdef CONFIG_HEXAGON_COMET -#define RTOS_TIMER_REGS_ADDR 0xAB000000UL -#endif -#define SLEEP_CLK_RATE 32000 - -#endif diff --git a/arch/hexagon/include/asm/timex.h b/arch/hexagon/include/asm/timex.h index 8d4ec76fceb4..dfe69e118b2b 100644 --- a/arch/hexagon/include/asm/timex.h +++ b/arch/hexagon/include/asm/timex.h @@ -7,11 +7,10 @@ #define _ASM_TIMEX_H #include <asm-generic/timex.h> -#include <asm/timer-regs.h> #include <asm/hexagon_vm.h> /* Using TCX0 as our clock. CLOCK_TICK_RATE scheduled to be removed. */ -#define CLOCK_TICK_RATE TCX0_CLK_RATE +#define CLOCK_TICK_RATE 19200 #define ARCH_HAS_READ_CURRENT_TIMER diff --git a/arch/hexagon/kernel/.gitignore b/arch/hexagon/kernel/.gitignore new file mode 100644 index 000000000000..c5f676c3c224 --- /dev/null +++ b/arch/hexagon/kernel/.gitignore @@ -0,0 +1 @@ +vmlinux.lds diff --git a/arch/hexagon/kernel/time.c b/arch/hexagon/kernel/time.c index feffe527ac92..febc95714d75 100644 --- a/arch/hexagon/kernel/time.c +++ b/arch/hexagon/kernel/time.c @@ -17,9 +17,10 @@ #include <linux/of_irq.h> #include <linux/module.h> -#include <asm/timer-regs.h> #include <asm/hexagon_vm.h> +#define TIMER_ENABLE BIT(0) + /* * For the clocksource we need: * pcycle frequency (600MHz) @@ -33,6 +34,13 @@ cycles_t pcycle_freq_mhz; cycles_t thread_freq_mhz; cycles_t sleep_clk_freq; +/* + * 8x50 HDD Specs 5-8. Simulator co-sim not fixed until + * release 1.1, and then it's "adjustable" and probably not defaulted. + */ +#define RTOS_TIMER_INT 3 +#define RTOS_TIMER_REGS_ADDR 0xAB000000UL + static struct resource rtos_timer_resources[] = { { .start = RTOS_TIMER_REGS_ADDR, @@ -80,7 +88,7 @@ static int set_next_event(unsigned long delta, struct clock_event_device *evt) iowrite32(0, &rtos_timer->clear); iowrite32(delta, &rtos_timer->match); - iowrite32(1 << TIMER_ENABLE, &rtos_timer->enable); + iowrite32(TIMER_ENABLE, &rtos_timer->enable); return 0; } diff --git a/arch/hexagon/lib/io.c b/arch/hexagon/lib/io.c index d35d69d6588c..55f75392857b 100644 --- a/arch/hexagon/lib/io.c +++ b/arch/hexagon/lib/io.c @@ -27,6 +27,7 @@ void __raw_readsw(const void __iomem *addr, void *data, int len) *dst++ = *src; } +EXPORT_SYMBOL(__raw_readsw); /* * __raw_writesw - read words a short at a time @@ -47,6 +48,7 @@ void __raw_writesw(void __iomem *addr, const void *data, int len) } +EXPORT_SYMBOL(__raw_writesw); /* Pretty sure len is pre-adjusted for the length of the access already */ void __raw_readsl(const void __iomem *addr, void *data, int len) @@ -62,6 +64,7 @@ void __raw_readsl(const void __iomem *addr, void *data, int len) } +EXPORT_SYMBOL(__raw_readsl); void __raw_writesl(void __iomem *addr, const void *data, int len) { @@ -76,3 +79,4 @@ void __raw_writesl(void __iomem *addr, const void *data, int len) } +EXPORT_SYMBOL(__raw_writesl); diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 6fea1844fb95..707ae121f6d3 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -369,3 +369,4 @@ 446 common landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv diff --git a/arch/m68k/include/asm/cacheflush_mm.h b/arch/m68k/include/asm/cacheflush_mm.h index 8ab46625ddd3..1ac55e7b47f0 100644 --- a/arch/m68k/include/asm/cacheflush_mm.h +++ b/arch/m68k/include/asm/cacheflush_mm.h @@ -250,7 +250,6 @@ static inline void __flush_page_to_ram(void *vaddr) #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 #define flush_dcache_page(page) __flush_page_to_ram(page_address(page)) -void flush_dcache_folio(struct folio *folio); #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) #define flush_icache_page(vma, page) __flush_page_to_ram(page_address(page)) diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 7976dff8f879..45bc32a41b90 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -448,3 +448,4 @@ 446 common landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c index 99058a6da956..34d6458340b0 100644 --- a/arch/m68k/kernel/traps.c +++ b/arch/m68k/kernel/traps.c @@ -1145,7 +1145,7 @@ asmlinkage void set_esp0(unsigned long ssp) */ asmlinkage void fpsp040_die(void) { - force_fatal_sig(SIGSEGV); + force_exit_sig(SIGSEGV); } #ifdef CONFIG_M68KFPU_EMU diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 6b0e11362bd2..2204bde3ce4a 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -454,3 +454,4 @@ 446 common landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index de60ad190057..0215dc1529e9 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -3097,7 +3097,7 @@ config STACKTRACE_SUPPORT config PGTABLE_LEVELS int default 4 if PAGE_SIZE_4KB && MIPS_VA_BITS_48 - default 3 if 64BIT && !PAGE_SIZE_64KB + default 3 if 64BIT && (!PAGE_SIZE_64KB || MIPS_VA_BITS_48) default 2 config MIPS_AUTO_PFN_OFFSET diff --git a/arch/mips/bcm63xx/clk.c b/arch/mips/bcm63xx/clk.c index 5a3e325275d0..1c91064cb448 100644 --- a/arch/mips/bcm63xx/clk.c +++ b/arch/mips/bcm63xx/clk.c @@ -381,6 +381,12 @@ void clk_disable(struct clk *clk) EXPORT_SYMBOL(clk_disable); +struct clk *clk_get_parent(struct clk *clk) +{ + return NULL; +} +EXPORT_SYMBOL(clk_get_parent); + unsigned long clk_get_rate(struct clk *clk) { if (!clk) diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index 2861a05c2e0c..f27cf31b4140 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -52,7 +52,7 @@ endif vmlinuzobjs-$(CONFIG_KERNEL_XZ) += $(obj)/ashldi3.o -vmlinuzobjs-$(CONFIG_KERNEL_ZSTD) += $(obj)/bswapdi.o +vmlinuzobjs-$(CONFIG_KERNEL_ZSTD) += $(obj)/bswapdi.o $(obj)/ashldi3.o targets := $(notdir $(vmlinuzobjs-y)) diff --git a/arch/mips/generic/yamon-dt.c b/arch/mips/generic/yamon-dt.c index a3aa22c77cad..a07a5edbcda7 100644 --- a/arch/mips/generic/yamon-dt.c +++ b/arch/mips/generic/yamon-dt.c @@ -75,7 +75,7 @@ static unsigned int __init gen_fdt_mem_array( __init int yamon_dt_append_memory(void *fdt, const struct yamon_mem_region *regions) { - unsigned long phys_memsize, memsize; + unsigned long phys_memsize = 0, memsize; __be32 mem_array[2 * MAX_MEM_ARRAY_ENTRIES]; unsigned int mem_entries; int i, err, mem_off; diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h index f207388541d5..b3dc9c589442 100644 --- a/arch/mips/include/asm/cacheflush.h +++ b/arch/mips/include/asm/cacheflush.h @@ -61,8 +61,6 @@ static inline void flush_dcache_page(struct page *page) SetPageDcacheDirty(page); } -void flush_dcache_folio(struct folio *folio); - #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index ac0e2cfc6d57..24a529c6c4be 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -1734,8 +1734,6 @@ static inline void decode_cpucfg(struct cpuinfo_mips *c) static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) { - decode_configs(c); - /* All Loongson processors covered here define ExcCode 16 as GSExc. */ c->options |= MIPS_CPU_GSEXCEX; @@ -1796,6 +1794,8 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) panic("Unknown Loongson Processor ID!"); break; } + + decode_configs(c); } #else static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) { } diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c index 376a6e2676e9..9f47a889b047 100644 --- a/arch/mips/kernel/proc.c +++ b/arch/mips/kernel/proc.c @@ -185,7 +185,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_puts(m, " tx39_cache"); if (cpu_has_octeon_cache) seq_puts(m, " octeon_cache"); - if (cpu_has_fpu) + if (raw_cpu_has_fpu) seq_puts(m, " fpu"); if (cpu_has_32fpr) seq_puts(m, " 32fpr"); diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 70e32de2bcaa..72d02d363f36 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -387,3 +387,4 @@ 446 n32 landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 n32 process_mrelease sys_process_mrelease +449 n32 futex_waitv sys_futex_waitv diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 1ca7bc337932..e2c481fcede6 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -363,3 +363,4 @@ 446 n64 landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 n64 process_mrelease sys_process_mrelease +449 n64 futex_waitv sys_futex_waitv diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index a61c35edaa74..3714c97b2643 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -436,3 +436,4 @@ 446 o32 landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 o32 process_mrelease sys_process_mrelease +449 o32 futex_waitv sys_futex_waitv diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 562aa878b266..aa20d074d388 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -1067,7 +1067,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 1; break; case KVM_CAP_NR_VCPUS: - r = num_online_cpus(); + r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS); break; case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; diff --git a/arch/mips/lantiq/clk.c b/arch/mips/lantiq/clk.c index dd819e31fcbb..4916cccf378f 100644 --- a/arch/mips/lantiq/clk.c +++ b/arch/mips/lantiq/clk.c @@ -158,6 +158,12 @@ void clk_deactivate(struct clk *clk) } EXPORT_SYMBOL(clk_deactivate); +struct clk *clk_get_parent(struct clk *clk) +{ + return NULL; +} +EXPORT_SYMBOL(clk_get_parent); + static inline u32 get_counter_resolution(void) { u32 res; diff --git a/arch/mips/net/bpf_jit_comp.h b/arch/mips/net/bpf_jit_comp.h index 6f3a7b07294b..a37fe20818eb 100644 --- a/arch/mips/net/bpf_jit_comp.h +++ b/arch/mips/net/bpf_jit_comp.h @@ -98,7 +98,7 @@ do { \ #define emit(...) __emit(__VA_ARGS__) /* Workaround for R10000 ll/sc errata */ -#ifdef CONFIG_WAR_R10000 +#ifdef CONFIG_WAR_R10000_LLSC #define LLSC_beqz beqzl #else #define LLSC_beqz beqz diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h index 3fc0bb7d6487..c2a222ebfa2a 100644 --- a/arch/nds32/include/asm/cacheflush.h +++ b/arch/nds32/include/asm/cacheflush.h @@ -27,7 +27,6 @@ void flush_cache_vunmap(unsigned long start, unsigned long end); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 void flush_dcache_page(struct page *page); -void flush_dcache_folio(struct folio *folio); void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr, void *dst, void *src, int len); void copy_from_user_page(struct vm_area_struct *vma, struct page *page, diff --git a/arch/nios2/include/asm/cacheflush.h b/arch/nios2/include/asm/cacheflush.h index 1999561b22aa..d0b71dd71287 100644 --- a/arch/nios2/include/asm/cacheflush.h +++ b/arch/nios2/include/asm/cacheflush.h @@ -29,7 +29,6 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 void flush_dcache_page(struct page *page); -void flush_dcache_folio(struct folio *folio); extern void flush_icache_range(unsigned long start, unsigned long end); extern void flush_icache_page(struct vm_area_struct *vma, struct page *page); diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 8db4af4879d0..82d77f4b0d08 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -15,7 +15,12 @@ # Mike Shaver, Helge Deller and Martin K. Petersen # +ifdef CONFIG_PARISC_SELF_EXTRACT +boot := arch/parisc/boot +KBUILD_IMAGE := $(boot)/bzImage +else KBUILD_IMAGE := vmlinuz +endif NM = sh $(srctree)/arch/parisc/nm CHECKFLAGS += -D__hppa__=1 diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig index d6fd8fa7ed8c..53061cb2cf7f 100644 --- a/arch/parisc/configs/generic-32bit_defconfig +++ b/arch/parisc/configs/generic-32bit_defconfig @@ -231,6 +231,7 @@ CONFIG_CRYPTO_DEFLATE=y CONFIG_CRC_CCITT=m CONFIG_CRC_T10DIF=y CONFIG_FONTS=y +CONFIG_PRINTK_TIME=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_MEMORY_INIT=y diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig index d2daeac2b217..1b8fd80cbe7f 100644 --- a/arch/parisc/configs/generic-64bit_defconfig +++ b/arch/parisc/configs/generic-64bit_defconfig @@ -1,7 +1,9 @@ CONFIG_LOCALVERSION="-64bit" # CONFIG_LOCALVERSION_AUTO is not set +CONFIG_KERNEL_LZ4=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_AUDIT=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASKSTATS=y @@ -35,6 +37,7 @@ CONFIG_MODVERSIONS=y CONFIG_BLK_DEV_INTEGRITY=y CONFIG_BINFMT_MISC=m # CONFIG_COMPACTION is not set +CONFIG_MEMORY_FAILURE=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -65,12 +68,15 @@ CONFIG_SCSI_ISCSI_ATTRS=y CONFIG_SCSI_SRP_ATTRS=y CONFIG_ISCSI_BOOT_SYSFS=y CONFIG_SCSI_MPT2SAS=y -CONFIG_SCSI_LASI700=m +CONFIG_SCSI_LASI700=y CONFIG_SCSI_SYM53C8XX_2=y CONFIG_SCSI_ZALON=y CONFIG_SCSI_QLA_ISCSI=m CONFIG_SCSI_DH=y CONFIG_ATA=y +CONFIG_SATA_SIL=y +CONFIG_SATA_SIS=y +CONFIG_SATA_VIA=y CONFIG_PATA_NS87415=y CONFIG_PATA_SIL680=y CONFIG_ATA_GENERIC=y @@ -79,6 +85,7 @@ CONFIG_MD_LINEAR=m CONFIG_BLK_DEV_DM=m CONFIG_DM_RAID=m CONFIG_DM_UEVENT=y +CONFIG_DM_AUDIT=y CONFIG_FUSION=y CONFIG_FUSION_SPI=y CONFIG_FUSION_SAS=y @@ -196,10 +203,15 @@ CONFIG_FB_MATROX_G=y CONFIG_FB_MATROX_I2C=y CONFIG_FB_MATROX_MAVEN=y CONFIG_FB_RADEON=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_CLUT224 is not set CONFIG_HIDRAW=y CONFIG_HID_PID=y CONFIG_USB_HIDDEV=y CONFIG_USB=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_OHCI_HCD_PLATFORM=y CONFIG_UIO=y CONFIG_UIO_PDRV_GENIRQ=m CONFIG_UIO_AEC=m diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index 7085df079702..6d13ae236fcb 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -3,38 +3,19 @@ * Copyright (C) 1999 Hewlett-Packard (Frank Rowand) * Copyright (C) 1999 Philipp Rumpf <prumpf@tux.org> * Copyright (C) 1999 SuSE GmbH + * Copyright (C) 2021 Helge Deller <deller@gmx.de> */ #ifndef _PARISC_ASSEMBLY_H #define _PARISC_ASSEMBLY_H -#define CALLEE_FLOAT_FRAME_SIZE 80 - #ifdef CONFIG_64BIT -#define LDREG ldd -#define STREG std -#define LDREGX ldd,s -#define LDREGM ldd,mb -#define STREGM std,ma -#define SHRREG shrd -#define SHLREG shld -#define ANDCM andcm,* -#define COND(x) * ## x #define RP_OFFSET 16 #define FRAME_SIZE 128 #define CALLEE_REG_FRAME_SIZE 144 #define REG_SZ 8 #define ASM_ULONG_INSN .dword #else /* CONFIG_64BIT */ -#define LDREG ldw -#define STREG stw -#define LDREGX ldwx,s -#define LDREGM ldwm -#define STREGM stwm -#define SHRREG shr -#define SHLREG shlw -#define ANDCM andcm -#define COND(x) x #define RP_OFFSET 20 #define FRAME_SIZE 64 #define CALLEE_REG_FRAME_SIZE 128 @@ -45,6 +26,7 @@ /* Frame alignment for 32- and 64-bit */ #define FRAME_ALIGN 64 +#define CALLEE_FLOAT_FRAME_SIZE 80 #define CALLEE_SAVE_FRAME_SIZE (CALLEE_REG_FRAME_SIZE + CALLEE_FLOAT_FRAME_SIZE) #ifdef CONFIG_PA20 @@ -68,6 +50,28 @@ #ifdef __ASSEMBLY__ #ifdef CONFIG_64BIT +#define LDREG ldd +#define STREG std +#define LDREGX ldd,s +#define LDREGM ldd,mb +#define STREGM std,ma +#define SHRREG shrd +#define SHLREG shld +#define ANDCM andcm,* +#define COND(x) * ## x +#else /* CONFIG_64BIT */ +#define LDREG ldw +#define STREG stw +#define LDREGX ldwx,s +#define LDREGM ldwm +#define STREGM stwm +#define SHRREG shr +#define SHLREG shlw +#define ANDCM andcm +#define COND(x) x +#endif + +#ifdef CONFIG_64BIT /* the 64-bit pa gnu assembler unfortunately defaults to .level 1.1 or 2.0 so * work around that for now... */ .level 2.0w @@ -143,6 +147,17 @@ extrd,u \r, 63-(\sa), 64-(\sa), \t .endm + /* Extract unsigned for 32- and 64-bit + * The extru instruction leaves the most significant 32 bits of the + * target register in an undefined state on PA 2.0 systems. */ + .macro extru_safe r, p, len, t +#ifdef CONFIG_64BIT + extrd,u \r, 32+(\p), \len, \t +#else + extru \r, \p, \len, \t +#endif + .endm + /* load 32-bit 'value' into 'reg' compensating for the ldil * sign-extension when running in wide mode. * WARNING!! neither 'value' nor 'reg' can be expressions diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index da0cd4b3a28f..859b8a34adcf 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -50,7 +50,6 @@ void invalidate_kernel_vmap_range(void *vaddr, int size); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 void flush_dcache_page(struct page *page); -void flush_dcache_folio(struct folio *folio); #define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages) #define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages) diff --git a/arch/parisc/include/asm/jump_label.h b/arch/parisc/include/asm/jump_label.h index 7efb1aa2f7f8..af2a598bc0f8 100644 --- a/arch/parisc/include/asm/jump_label.h +++ b/arch/parisc/include/asm/jump_label.h @@ -5,6 +5,7 @@ #ifndef __ASSEMBLY__ #include <linux/types.h> +#include <linux/stringify.h> #include <asm/assembly.h> #define JUMP_LABEL_NOP_SIZE 4 diff --git a/arch/parisc/include/asm/rt_sigframe.h b/arch/parisc/include/asm/rt_sigframe.h index 4b9e3d707571..2b3010ade00e 100644 --- a/arch/parisc/include/asm/rt_sigframe.h +++ b/arch/parisc/include/asm/rt_sigframe.h @@ -2,7 +2,7 @@ #ifndef _ASM_PARISC_RT_SIGFRAME_H #define _ASM_PARISC_RT_SIGFRAME_H -#define SIGRETURN_TRAMP 3 +#define SIGRETURN_TRAMP 4 #define SIGRESTARTBLOCK_TRAMP 5 #define TRAMP_SIZE (SIGRETURN_TRAMP + SIGRESTARTBLOCK_TRAMP) diff --git a/arch/parisc/install.sh b/arch/parisc/install.sh index 056d588befdd..70d3cffb0251 100644 --- a/arch/parisc/install.sh +++ b/arch/parisc/install.sh @@ -39,6 +39,7 @@ verify "$3" if [ -n "${INSTALLKERNEL}" ]; then if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi + if [ -x /usr/sbin/${INSTALLKERNEL} ]; then exec /usr/sbin/${INSTALLKERNEL} "$@"; fi fi # Default install diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 88c188a965d8..6e9cdb269862 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -366,17 +366,9 @@ */ .macro L2_ptep pmd,pte,index,va,fault #if CONFIG_PGTABLE_LEVELS == 3 - extru \va,31-ASM_PMD_SHIFT,ASM_BITS_PER_PMD,\index + extru_safe \va,31-ASM_PMD_SHIFT,ASM_BITS_PER_PMD,\index #else -# if defined(CONFIG_64BIT) - extrd,u \va,63-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index - #else - # if PAGE_SIZE > 4096 - extru \va,31-ASM_PGDIR_SHIFT,32-ASM_PGDIR_SHIFT,\index - # else - extru \va,31-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index - # endif -# endif + extru_safe \va,31-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index #endif dep %r0,31,PAGE_SHIFT,\pmd /* clear offset */ #if CONFIG_PGTABLE_LEVELS < 3 @@ -386,7 +378,7 @@ bb,>=,n \pmd,_PxD_PRESENT_BIT,\fault dep %r0,31,PxD_FLAG_SHIFT,\pmd /* clear flags */ SHLREG \pmd,PxD_VALUE_SHIFT,\pmd - extru \va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index + extru_safe \va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index dep %r0,31,PAGE_SHIFT,\pmd /* clear offset */ shladd \index,BITS_PER_PTE_ENTRY,\pmd,\pmd /* pmd is now pte */ .endm diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index bbfe23c40c01..46b1050640b8 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -288,21 +288,22 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, already in userspace. The first words of tramp are used to save the previous sigrestartblock trampoline that might be on the stack. We start the sigreturn trampoline at - SIGRESTARTBLOCK_TRAMP. */ + SIGRESTARTBLOCK_TRAMP+X. */ err |= __put_user(in_syscall ? INSN_LDI_R25_1 : INSN_LDI_R25_0, &frame->tramp[SIGRESTARTBLOCK_TRAMP+0]); - err |= __put_user(INSN_BLE_SR2_R0, + err |= __put_user(INSN_LDI_R20, &frame->tramp[SIGRESTARTBLOCK_TRAMP+1]); - err |= __put_user(INSN_LDI_R20, + err |= __put_user(INSN_BLE_SR2_R0, &frame->tramp[SIGRESTARTBLOCK_TRAMP+2]); + err |= __put_user(INSN_NOP, &frame->tramp[SIGRESTARTBLOCK_TRAMP+3]); - start = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+0]; - end = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+3]; + start = (unsigned long) &frame->tramp[0]; + end = (unsigned long) &frame->tramp[TRAMP_SIZE]; flush_user_dcache_range_asm(start, end); flush_user_icache_range_asm(start, end); /* TRAMP Words 0-4, Length 5 = SIGRESTARTBLOCK_TRAMP - * TRAMP Words 5-7, Length 3 = SIGRETURN_TRAMP + * TRAMP Words 5-9, Length 4 = SIGRETURN_TRAMP * So the SIGRETURN_TRAMP is at the end of SIGRESTARTBLOCK_TRAMP */ rp = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP]; diff --git a/arch/parisc/kernel/signal32.h b/arch/parisc/kernel/signal32.h index a5bdbb5678b7..f166250f2d06 100644 --- a/arch/parisc/kernel/signal32.h +++ b/arch/parisc/kernel/signal32.h @@ -36,7 +36,7 @@ struct compat_regfile { compat_int_t rf_sar; }; -#define COMPAT_SIGRETURN_TRAMP 3 +#define COMPAT_SIGRETURN_TRAMP 4 #define COMPAT_SIGRESTARTBLOCK_TRAMP 5 #define COMPAT_TRAMP_SIZE (COMPAT_SIGRETURN_TRAMP + \ COMPAT_SIGRESTARTBLOCK_TRAMP) diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 4fb3b6a993bf..d2497b339d13 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -566,7 +566,7 @@ lws_compare_and_swap: ldo R%lws_lock_start(%r20), %r28 /* Extract eight bits from r26 and hash lock (Bits 3-11) */ - extru %r26, 28, 8, %r20 + extru_safe %r26, 28, 8, %r20 /* Find lock to use, the hash is either one of 0 to 15, multiplied by 16 (keep it 16-byte aligned) @@ -751,7 +751,7 @@ cas2_lock_start: ldo R%lws_lock_start(%r20), %r28 /* Extract eight bits from r26 and hash lock (Bits 3-11) */ - extru %r26, 28, 8, %r20 + extru_safe %r26, 28, 8, %r20 /* Find lock to use, the hash is either one of 0 to 15, multiplied by 16 (keep it 16-byte aligned) diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index bf751e0732b7..358c00000755 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -446,3 +446,4 @@ 446 common landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 9fb1e794831b..061119a56fbe 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -249,30 +249,16 @@ void __init time_init(void) static int __init init_cr16_clocksource(void) { /* - * The cr16 interval timers are not syncronized across CPUs on - * different sockets, so mark them unstable and lower rating on - * multi-socket SMP systems. + * The cr16 interval timers are not syncronized across CPUs, even if + * they share the same socket. */ if (num_online_cpus() > 1 && !running_on_qemu) { - int cpu; - unsigned long cpu0_loc; - cpu0_loc = per_cpu(cpu_data, 0).cpu_loc; - - for_each_online_cpu(cpu) { - if (cpu == 0) - continue; - if ((cpu0_loc != 0) && - (cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc)) - continue; - - /* mark sched_clock unstable */ - clear_sched_clock_stable(); - - clocksource_cr16.name = "cr16_unstable"; - clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; - clocksource_cr16.rating = 0; - break; - } + /* mark sched_clock unstable */ + clear_sched_clock_stable(); + + clocksource_cr16.name = "cr16_unstable"; + clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; + clocksource_cr16.rating = 0; } /* register at clocksource framework */ diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 3d208afd15bc..2769eb991f58 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -57,8 +57,6 @@ SECTIONS { . = KERNEL_BINARY_TEXT_START; - _stext = .; /* start of kernel text, includes init code & data */ - __init_begin = .; HEAD_TEXT_SECTION MLONGCALL_DISCARD(INIT_TEXT_SECTION(8)) @@ -82,6 +80,7 @@ SECTIONS /* freed after init ends here */ _text = .; /* Text and read-only data */ + _stext = .; MLONGCALL_KEEP(INIT_TEXT_SECTION(8)) .text ALIGN(PAGE_SIZE) : { TEXT_TEXT diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 0e3640e14eb1..5fa68c2ef1f8 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -196,3 +196,6 @@ clean-files := vmlinux.lds # Force dependency (incbin is bad) $(obj)/vdso32_wrapper.o : $(obj)/vdso32/vdso32.so.dbg $(obj)/vdso64_wrapper.o : $(obj)/vdso64/vdso64.so.dbg + +# for cleaning +subdir- += vdso32 vdso64 diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 6b1ec9e3541b..349c4a820231 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -202,11 +202,11 @@ vmap_stack_overflow: mfspr r1, SPRN_SPRG_THREAD lwz r1, TASK_CPU - THREAD(r1) slwi r1, r1, 3 - addis r1, r1, emergency_ctx@ha + addis r1, r1, emergency_ctx-PAGE_OFFSET@ha #else - lis r1, emergency_ctx@ha + lis r1, emergency_ctx-PAGE_OFFSET@ha #endif - lwz r1, emergency_ctx@l(r1) + lwz r1, emergency_ctx-PAGE_OFFSET@l(r1) addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE EXCEPTION_PROLOG_2 0 vmap_stack_overflow prepare_transfer_to_handler diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 2d596881b70e..0d073b9fd52c 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -733,6 +733,7 @@ _GLOBAL(mmu_pin_tlb) #ifdef CONFIG_PIN_TLB_DATA LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED) + li r8, 0 #ifdef CONFIG_PIN_TLB_IMMR li r0, 3 #else @@ -741,26 +742,26 @@ _GLOBAL(mmu_pin_tlb) mtctr r0 cmpwi r4, 0 beq 4f - LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT) LOAD_REG_ADDR(r9, _sinittext) 2: ori r0, r6, MD_EVALID + ori r12, r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT mtspr SPRN_MD_CTR, r5 mtspr SPRN_MD_EPN, r0 mtspr SPRN_MD_TWC, r7 - mtspr SPRN_MD_RPN, r8 + mtspr SPRN_MD_RPN, r12 addi r5, r5, 0x100 addis r6, r6, SZ_8M@h addis r8, r8, SZ_8M@h cmplw r6, r9 bdnzt lt, 2b - -4: LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT) +4: 2: ori r0, r6, MD_EVALID + ori r12, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT mtspr SPRN_MD_CTR, r5 mtspr SPRN_MD_EPN, r0 mtspr SPRN_MD_TWC, r7 - mtspr SPRN_MD_RPN, r8 + mtspr SPRN_MD_RPN, r12 addi r5, r5, 0x100 addis r6, r6, SZ_8M@h addis r8, r8, SZ_8M@h @@ -781,7 +782,7 @@ _GLOBAL(mmu_pin_tlb) #endif #if defined(CONFIG_PIN_TLB_IMMR) || defined(CONFIG_PIN_TLB_DATA) lis r0, (MD_RSV4I | MD_TWAM)@h - mtspr SPRN_MI_CTR, r0 + mtspr SPRN_MD_CTR, r0 #endif mtspr SPRN_SRR1, r10 mtspr SPRN_SRR0, r11 diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index 1f07317964e4..618aeccdf691 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -25,8 +25,14 @@ static inline int __get_user_sigset(sigset_t *dst, const sigset_t __user *src) return __get_user(dst->sig[0], (u64 __user *)&src->sig[0]); } -#define unsafe_get_user_sigset(dst, src, label) \ - unsafe_get_user((dst)->sig[0], (u64 __user *)&(src)->sig[0], label) +#define unsafe_get_user_sigset(dst, src, label) do { \ + sigset_t *__dst = dst; \ + const sigset_t __user *__src = src; \ + int i; \ + \ + for (i = 0; i < _NSIG_WORDS; i++) \ + unsafe_get_user(__dst->sig[i], &__src->sig[i], label); \ +} while (0) #ifdef CONFIG_VSX extern unsigned long copy_vsx_to_user(void __user *to, diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 00a9c9cd6d42..3e053e2fd6b6 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1063,7 +1063,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, * We kill the task with a SIGSEGV in this situation. */ if (do_setcontext(new_ctx, regs, 0)) { - force_fatal_sig(SIGSEGV); + force_exit_sig(SIGSEGV); return -EFAULT; } diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index ef518535d436..d1e1fc0acbea 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -704,7 +704,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, */ if (__get_user_sigset(&set, &new_ctx->uc_sigmask)) { - force_fatal_sig(SIGSEGV); + force_exit_sig(SIGSEGV); return -EFAULT; } set_current_blocked(&set); @@ -713,7 +713,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, return -EFAULT; if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) { user_read_access_end(); - force_fatal_sig(SIGSEGV); + force_exit_sig(SIGSEGV); return -EFAULT; } user_read_access_end(); diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 7bef917cc84e..15109af9d075 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -528,3 +528,4 @@ 446 common landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index f9ea0e5357f9..3fa6d240bade 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -187,6 +187,12 @@ static void watchdog_smp_panic(int cpu, u64 tb) if (sysctl_hardlockup_all_cpu_backtrace) trigger_allbutself_cpu_backtrace(); + /* + * Force flush any remote buffers that might be stuck in IRQ context + * and therefore could not run their irq_work. + */ + printk_trigger_flush(); + if (hardlockup_panic) nmi_panic(NULL, "Hard LOCKUP"); diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index fcf4760a3a0e..70b7a8f97153 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -695,6 +695,7 @@ static void flush_guest_tlb(struct kvm *kvm) "r" (0) : "memory"); } asm volatile("ptesync": : :"memory"); + // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now. asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory"); } else { for (set = 0; set < kvm->arch.tlb_sets; ++set) { @@ -705,7 +706,9 @@ static void flush_guest_tlb(struct kvm *kvm) rb += PPC_BIT(51); /* increment set number */ } asm volatile("ptesync": : :"memory"); - asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory"); + // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now. + if (cpu_has_feature(CPU_FTR_ARCH_300)) + asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory"); } } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index eb776d0c5d8e..32a4b4d412b9 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2005,7 +2005,7 @@ hcall_real_table: .globl hcall_real_table_end hcall_real_table_end: -_GLOBAL(kvmppc_h_set_xdabr) +_GLOBAL_TOC(kvmppc_h_set_xdabr) EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr) andi. r0, r5, DABRX_USER | DABRX_KERNEL beq 6f @@ -2015,7 +2015,7 @@ EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr) 6: li r3, H_PARAMETER blr -_GLOBAL(kvmppc_h_set_dabr) +_GLOBAL_TOC(kvmppc_h_set_dabr) EXPORT_SYMBOL_GPL(kvmppc_h_set_dabr) li r5, DABRX_USER | DABRX_KERNEL 3: diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 35e9cccdeef9..a72920f4f221 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -641,9 +641,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) * implementations just count online CPUs. */ if (hv_enabled) - r = num_present_cpus(); + r = min_t(unsigned int, num_present_cpus(), KVM_MAX_VCPUS); else - r = num_online_cpus(); + r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS); break; case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c index 8fc49b1b4a91..6ec978967da0 100644 --- a/arch/powerpc/mm/nohash/kaslr_booke.c +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -314,7 +314,7 @@ static unsigned long __init kaslr_choose_location(void *dt_ptr, phys_addr_t size pr_warn("KASLR: No safe seed for randomizing the kernel base.\n"); ram = min_t(phys_addr_t, __max_low_memory, size); - ram = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, true, false); + ram = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, true, true); linear_sz = min_t(unsigned long, ram, SZ_512M); /* If the linear size is smaller than 64M, do not randmize */ diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c index 89353d4f5604..647bf454a0fa 100644 --- a/arch/powerpc/mm/nohash/tlb.c +++ b/arch/powerpc/mm/nohash/tlb.c @@ -645,7 +645,7 @@ static void early_init_this_mmu(void) if (map) linear_map_top = map_mem_in_cams(linear_map_top, - num_cams, true, true); + num_cams, false, true); } #endif @@ -766,7 +766,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4; linear_sz = map_mem_in_cams(first_memblock_size, num_cams, - false, true); + true, true); ppc64_rma_size = min_t(u64, linear_sz, 0x40000000); } else diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 6f14c8fb6359..59d3cfcd7887 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -376,9 +376,9 @@ static void initialize_form2_numa_distance_lookup_table(void) { int i, j; struct device_node *root; - const __u8 *numa_dist_table; + const __u8 *form2_distances; const __be32 *numa_lookup_index; - int numa_dist_table_length; + int form2_distances_length; int max_numa_index, distance_index; if (firmware_has_feature(FW_FEATURE_OPAL)) @@ -392,45 +392,41 @@ static void initialize_form2_numa_distance_lookup_table(void) max_numa_index = of_read_number(&numa_lookup_index[0], 1); /* first element of the array is the size and is encode-int */ - numa_dist_table = of_get_property(root, "ibm,numa-distance-table", NULL); - numa_dist_table_length = of_read_number((const __be32 *)&numa_dist_table[0], 1); + form2_distances = of_get_property(root, "ibm,numa-distance-table", NULL); + form2_distances_length = of_read_number((const __be32 *)&form2_distances[0], 1); /* Skip the size which is encoded int */ - numa_dist_table += sizeof(__be32); + form2_distances += sizeof(__be32); - pr_debug("numa_dist_table_len = %d, numa_dist_indexes_len = %d\n", - numa_dist_table_length, max_numa_index); + pr_debug("form2_distances_len = %d, numa_dist_indexes_len = %d\n", + form2_distances_length, max_numa_index); for (i = 0; i < max_numa_index; i++) /* +1 skip the max_numa_index in the property */ numa_id_index_table[i] = of_read_number(&numa_lookup_index[i + 1], 1); - if (numa_dist_table_length != max_numa_index * max_numa_index) { + if (form2_distances_length != max_numa_index * max_numa_index) { WARN(1, "Wrong NUMA distance information\n"); - /* consider everybody else just remote. */ - for (i = 0; i < max_numa_index; i++) { - for (j = 0; j < max_numa_index; j++) { - int nodeA = numa_id_index_table[i]; - int nodeB = numa_id_index_table[j]; - - if (nodeA == nodeB) - numa_distance_table[nodeA][nodeB] = LOCAL_DISTANCE; - else - numa_distance_table[nodeA][nodeB] = REMOTE_DISTANCE; - } - } + form2_distances = NULL; // don't use it } - distance_index = 0; for (i = 0; i < max_numa_index; i++) { for (j = 0; j < max_numa_index; j++) { int nodeA = numa_id_index_table[i]; int nodeB = numa_id_index_table[j]; - - numa_distance_table[nodeA][nodeB] = numa_dist_table[distance_index++]; - pr_debug("dist[%d][%d]=%d ", nodeA, nodeB, numa_distance_table[nodeA][nodeB]); + int dist; + + if (form2_distances) + dist = form2_distances[distance_index++]; + else if (nodeA == nodeB) + dist = LOCAL_DISTANCE; + else + dist = REMOTE_DISTANCE; + numa_distance_table[nodeA][nodeB] = dist; + pr_debug("dist[%d][%d]=%d ", nodeA, nodeB, dist); } } + of_node_put(root); } diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c index bb789f33c70e..a38372f9ac12 100644 --- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c +++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c @@ -186,7 +186,6 @@ err: static int mcu_remove(struct i2c_client *client) { struct mcu *mcu = i2c_get_clientdata(client); - int ret; kthread_stop(shutdown_thread); diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 49b401536d29..8f998e55735b 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1094,15 +1094,6 @@ static phys_addr_t ddw_memory_hotplug_max(void) phys_addr_t max_addr = memory_hotplug_max(); struct device_node *memory; - /* - * The "ibm,pmemory" can appear anywhere in the address space. - * Assuming it is still backed by page structs, set the upper limit - * for the huge DMA window as MAX_PHYSMEM_BITS. - */ - if (of_find_node_by_type(NULL, "ibm,pmemory")) - return (sizeof(phys_addr_t) * 8 <= MAX_PHYSMEM_BITS) ? - (phys_addr_t) -1 : (1ULL << MAX_PHYSMEM_BITS); - for_each_node_by_type(memory, "memory") { unsigned long start, size; int n_mem_addr_cells, n_mem_size_cells, len; @@ -1238,7 +1229,6 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) u32 ddw_avail[DDW_APPLICABLE_SIZE]; struct dma_win *window; struct property *win64; - bool ddw_enabled = false; struct failed_ddw_pdn *fpdn; bool default_win_removed = false, direct_mapping = false; bool pmem_present; @@ -1253,7 +1243,6 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len)) { direct_mapping = (len >= max_ram_len); - ddw_enabled = true; goto out_unlock; } @@ -1367,8 +1356,10 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) len = order_base_2(query.largest_available_block << page_shift); win_name = DMA64_PROPNAME; } else { - direct_mapping = true; - win_name = DIRECT64_PROPNAME; + direct_mapping = !default_win_removed || + (len == MAX_PHYSMEM_BITS) || + (!pmem_present && (len == max_ram_len)); + win_name = direct_mapping ? DIRECT64_PROPNAME : DMA64_PROPNAME; } ret = create_ddw(dev, ddw_avail, &create, page_shift, len); @@ -1406,8 +1397,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) dev_info(&dev->dev, "failed to map DMA window for %pOF: %d\n", dn, ret); - /* Make sure to clean DDW if any TCE was set*/ - clean_dma_window(pdn, win64->value); + /* Make sure to clean DDW if any TCE was set*/ + clean_dma_window(pdn, win64->value); goto out_del_list; } } else { @@ -1454,7 +1445,6 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) spin_unlock(&dma_win_list_lock); dev->dev.archdata.dma_offset = win_addr; - ddw_enabled = true; goto out_unlock; out_del_list: @@ -1490,10 +1480,10 @@ out_unlock: * as RAM, then we failed to create a window to cover persistent * memory and need to set the DMA limit. */ - if (pmem_present && ddw_enabled && direct_mapping && len == max_ram_len) + if (pmem_present && direct_mapping && len == max_ram_len) dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + (1ULL << len); - return ddw_enabled && direct_mapping; + return direct_mapping; } static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) diff --git a/arch/powerpc/sysdev/xive/Kconfig b/arch/powerpc/sysdev/xive/Kconfig index 97796c6b63f0..785c292d104b 100644 --- a/arch/powerpc/sysdev/xive/Kconfig +++ b/arch/powerpc/sysdev/xive/Kconfig @@ -3,7 +3,6 @@ config PPC_XIVE bool select PPC_SMP_MUXED_IPI select HARDIRQS_SW_RESEND - select IRQ_DOMAIN_NOMAP config PPC_XIVE_NATIVE bool diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index c5d75c02ad8b..7b69299c2912 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1443,8 +1443,7 @@ static const struct irq_domain_ops xive_irq_domain_ops = { static void __init xive_init_host(struct device_node *np) { - xive_irq_domain = irq_domain_add_nomap(np, XIVE_MAX_IRQ, - &xive_irq_domain_ops, NULL); + xive_irq_domain = irq_domain_add_tree(np, &xive_irq_domain_ops, NULL); if (WARN_ON(xive_irq_domain == NULL)) return; irq_set_default_host(xive_irq_domain); diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 5927c94302b8..8a107ed18b0d 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -107,11 +107,13 @@ PHONY += vdso_install vdso_install: $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@ +ifeq ($(KBUILD_EXTMOD),) ifeq ($(CONFIG_MMU),y) prepare: vdso_prepare vdso_prepare: prepare0 $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso include/generated/vdso-offsets.h endif +endif ifneq ($(CONFIG_XIP_KERNEL),y) ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN),yy) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index c252fd5706d2..ef473e2f503b 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -19,6 +19,8 @@ CONFIG_SOC_VIRT=y CONFIG_SOC_MICROCHIP_POLARFIRE=y CONFIG_SMP=y CONFIG_HOTPLUG_CPU=y +CONFIG_VIRTUALIZATION=y +CONFIG_KVM=m CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig index 434ef5b64599..6e9f12ff968a 100644 --- a/arch/riscv/configs/rv32_defconfig +++ b/arch/riscv/configs/rv32_defconfig @@ -19,6 +19,8 @@ CONFIG_SOC_VIRT=y CONFIG_ARCH_RV32I=y CONFIG_SMP=y CONFIG_HOTPLUG_CPU=y +CONFIG_VIRTUALIZATION=y +CONFIG_KVM=m CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index 25ba21f98504..2639b9ee48f9 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -12,14 +12,12 @@ #include <linux/types.h> #include <linux/kvm.h> #include <linux/kvm_types.h> +#include <asm/csr.h> #include <asm/kvm_vcpu_fp.h> #include <asm/kvm_vcpu_timer.h> -#ifdef CONFIG_64BIT -#define KVM_MAX_VCPUS (1U << 16) -#else -#define KVM_MAX_VCPUS (1U << 9) -#endif +#define KVM_MAX_VCPUS \ + ((HGATP_VMID_MASK >> HGATP_VMID_SHIFT) + 1) #define KVM_HALT_POLL_NS_DEFAULT 500000 diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index d81bae8eb55e..fc058ff5f4b6 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -453,6 +453,12 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm) void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { + gpa_t gpa = slot->base_gfn << PAGE_SHIFT; + phys_addr_t size = slot->npages << PAGE_SHIFT; + + spin_lock(&kvm->mmu_lock); + stage2_unmap_range(kvm, gpa, size, false); + spin_unlock(&kvm->mmu_lock); } void kvm_arch_commit_memory_region(struct kvm *kvm, diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index e3d3aed46184..fb84619df012 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -740,7 +740,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) * Ensure we set mode to IN_GUEST_MODE after we disable * interrupts and before the final VCPU requests check. * See the comment in kvm_vcpu_exiting_guest_mode() and - * Documentation/virtual/kvm/vcpu-requests.rst + * Documentation/virt/kvm/vcpu-requests.rst */ vcpu->mode = IN_GUEST_MODE; diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index eb3c045edf11..3b0e703d22cf 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/** +/* * Copyright (c) 2019 Western Digital Corporation or its affiliates. * * Authors: diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index 26399df15b63..fb18af34a4b5 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c @@ -74,7 +74,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 1; break; case KVM_CAP_NR_VCPUS: - r = num_online_cpus(); + r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS); break; case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f614562d74f0..f6a9475cbc8c 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -47,7 +47,7 @@ config ARCH_SUPPORTS_UPROBES config KASAN_SHADOW_OFFSET hex depends on KASAN - default 0x18000000000000 + default 0x1C000000000000 config S390 def_bool y @@ -193,6 +193,7 @@ config S390 select HAVE_RELIABLE_STACKTRACE select HAVE_RSEQ select HAVE_SAMPLE_FTRACE_DIRECT + select HAVE_SAMPLE_FTRACE_DIRECT_MULTI select HAVE_SOFTIRQ_ON_OWN_STACK select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 69c45f600273..609e3697324b 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -77,10 +77,12 @@ KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y) KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y) ifneq ($(call cc-option,-mstack-size=8192 -mstack-guard=128),) -cflags-$(CONFIG_CHECK_STACK) += -mstack-size=$(STACK_SIZE) -ifeq ($(call cc-option,-mstack-size=8192),) -cflags-$(CONFIG_CHECK_STACK) += -mstack-guard=$(CONFIG_STACK_GUARD) -endif + CC_FLAGS_CHECK_STACK := -mstack-size=$(STACK_SIZE) + ifeq ($(call cc-option,-mstack-size=8192),) + CC_FLAGS_CHECK_STACK += -mstack-guard=$(CONFIG_STACK_GUARD) + endif + export CC_FLAGS_CHECK_STACK + cflags-$(CONFIG_CHECK_STACK) += $(CC_FLAGS_CHECK_STACK) endif ifdef CONFIG_EXPOLINE diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 7571dee72a0c..1aa11a8f57dd 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -149,82 +149,56 @@ static void setup_ident_map_size(unsigned long max_physmem_end) static void setup_kernel_memory_layout(void) { - bool vmalloc_size_verified = false; - unsigned long vmemmap_off; - unsigned long vspace_left; + unsigned long vmemmap_start; unsigned long rte_size; unsigned long pages; - unsigned long vmax; pages = ident_map_size / PAGE_SIZE; /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */ vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page); /* choose kernel address space layout: 4 or 3 levels. */ - vmemmap_off = round_up(ident_map_size, _REGION3_SIZE); + vmemmap_start = round_up(ident_map_size, _REGION3_SIZE); if (IS_ENABLED(CONFIG_KASAN) || vmalloc_size > _REGION2_SIZE || - vmemmap_off + vmemmap_size + vmalloc_size + MODULES_LEN > _REGION2_SIZE) - vmax = _REGION1_SIZE; - else - vmax = _REGION2_SIZE; - - /* keep vmemmap_off aligned to a top level region table entry */ - rte_size = vmax == _REGION1_SIZE ? _REGION2_SIZE : _REGION3_SIZE; - MODULES_END = vmax; - if (is_prot_virt_host()) { - /* - * forcing modules and vmalloc area under the ultravisor - * secure storage limit, so that any vmalloc allocation - * we do could be used to back secure guest storage. - */ - adjust_to_uv_max(&MODULES_END); - } - -#ifdef CONFIG_KASAN - if (MODULES_END < vmax) { - /* force vmalloc and modules below kasan shadow */ - MODULES_END = min(MODULES_END, KASAN_SHADOW_START); + vmemmap_start + vmemmap_size + vmalloc_size + MODULES_LEN > + _REGION2_SIZE) { + MODULES_END = _REGION1_SIZE; + rte_size = _REGION2_SIZE; } else { - /* - * leave vmalloc and modules above kasan shadow but make - * sure they don't overlap with it - */ - vmalloc_size = min(vmalloc_size, vmax - KASAN_SHADOW_END - MODULES_LEN); - vmalloc_size_verified = true; - vspace_left = KASAN_SHADOW_START; + MODULES_END = _REGION2_SIZE; + rte_size = _REGION3_SIZE; } + /* + * forcing modules and vmalloc area under the ultravisor + * secure storage limit, so that any vmalloc allocation + * we do could be used to back secure guest storage. + */ + adjust_to_uv_max(&MODULES_END); +#ifdef CONFIG_KASAN + /* force vmalloc and modules below kasan shadow */ + MODULES_END = min(MODULES_END, KASAN_SHADOW_START); #endif MODULES_VADDR = MODULES_END - MODULES_LEN; VMALLOC_END = MODULES_VADDR; - if (vmalloc_size_verified) { - VMALLOC_START = VMALLOC_END - vmalloc_size; - } else { - vmemmap_off = round_up(ident_map_size, rte_size); - - if (vmemmap_off + vmemmap_size > VMALLOC_END || - vmalloc_size > VMALLOC_END - vmemmap_off - vmemmap_size) { - /* - * allow vmalloc area to occupy up to 1/2 of - * the rest virtual space left. - */ - vmalloc_size = min(vmalloc_size, VMALLOC_END / 2); - } - VMALLOC_START = VMALLOC_END - vmalloc_size; - vspace_left = VMALLOC_START; - } + /* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */ + vmalloc_size = min(vmalloc_size, round_down(VMALLOC_END / 2, _REGION3_SIZE)); + VMALLOC_START = VMALLOC_END - vmalloc_size; - pages = vspace_left / (PAGE_SIZE + sizeof(struct page)); + /* split remaining virtual space between 1:1 mapping & vmemmap array */ + pages = VMALLOC_START / (PAGE_SIZE + sizeof(struct page)); pages = SECTION_ALIGN_UP(pages); - vmemmap_off = round_up(vspace_left - pages * sizeof(struct page), rte_size); - /* keep vmemmap left most starting from a fresh region table entry */ - vmemmap_off = min(vmemmap_off, round_up(ident_map_size, rte_size)); - /* take care that identity map is lower then vmemmap */ - ident_map_size = min(ident_map_size, vmemmap_off); + /* keep vmemmap_start aligned to a top level region table entry */ + vmemmap_start = round_down(VMALLOC_START - pages * sizeof(struct page), rte_size); + /* vmemmap_start is the future VMEM_MAX_PHYS, make sure it is within MAX_PHYSMEM */ + vmemmap_start = min(vmemmap_start, 1UL << MAX_PHYSMEM_BITS); + /* make sure identity map doesn't overlay with vmemmap */ + ident_map_size = min(ident_map_size, vmemmap_start); vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page); - VMALLOC_START = max(vmemmap_off + vmemmap_size, VMALLOC_START); - vmemmap = (struct page *)vmemmap_off; + /* make sure vmemmap doesn't overlay with vmalloc area */ + VMALLOC_START = max(vmemmap_start + vmemmap_size, VMALLOC_START); + vmemmap = (struct page *)vmemmap_start; } /* diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index fd825097cf04..b626bc6e0eaf 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -403,7 +403,6 @@ CONFIG_DEVTMPFS=y CONFIG_CONNECTOR=y CONFIG_ZRAM=y CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_DRBD=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y @@ -476,6 +475,7 @@ CONFIG_MACVLAN=m CONFIG_MACVTAP=m CONFIG_VXLAN=m CONFIG_BAREUDP=m +CONFIG_AMT=m CONFIG_TUN=m CONFIG_VETH=m CONFIG_VIRTIO_NET=m @@ -489,6 +489,7 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_AMD is not set # CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_VENDOR_ASIX is not set # CONFIG_NET_VENDOR_ATHEROS is not set # CONFIG_NET_VENDOR_BROADCOM is not set # CONFIG_NET_VENDOR_BROCADE is not set @@ -571,6 +572,7 @@ CONFIG_WATCHDOG=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m CONFIG_DIAG288_WATCHDOG=m +# CONFIG_DRM_DEBUG_MODESET_LOCK is not set CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y @@ -775,12 +777,14 @@ CONFIG_CRC4=m CONFIG_CRC7=m CONFIG_CRC8=m CONFIG_RANDOM32_SELFTEST=y +CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DEBUG_INFO_BTF=y CONFIG_GDB_SCRIPTS=y CONFIG_HEADERS_INSTALL=y CONFIG_DEBUG_SECTION_MISMATCH=y @@ -807,6 +811,7 @@ CONFIG_DEBUG_MEMORY_INIT=y CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m CONFIG_DEBUG_PER_CPU_MAPS=y CONFIG_KFENCE=y +CONFIG_KFENCE_STATIC_KEYS=y CONFIG_DEBUG_SHIRQ=y CONFIG_PANIC_ON_OOPS=y CONFIG_DETECT_HUNG_TASK=y @@ -842,6 +847,7 @@ CONFIG_FTRACE_STARTUP_TEST=y CONFIG_SAMPLES=y CONFIG_SAMPLE_TRACE_PRINTK=m CONFIG_SAMPLE_FTRACE_DIRECT=m +CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m CONFIG_DEBUG_ENTRY=y CONFIG_CIO_INJECT=y CONFIG_KUNIT=m @@ -860,7 +866,7 @@ CONFIG_FAIL_FUNCTION=y CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y CONFIG_LKDTM=m CONFIG_TEST_MIN_HEAP=y -CONFIG_KPROBES_SANITY_TEST=y +CONFIG_KPROBES_SANITY_TEST=m CONFIG_RBTREE_TEST=y CONFIG_INTERVAL_TREE_TEST=m CONFIG_PERCPU_TEST=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index c9c3cedff2d8..0056cab27372 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -394,7 +394,6 @@ CONFIG_DEVTMPFS=y CONFIG_CONNECTOR=y CONFIG_ZRAM=y CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_DRBD=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y @@ -467,6 +466,7 @@ CONFIG_MACVLAN=m CONFIG_MACVTAP=m CONFIG_VXLAN=m CONFIG_BAREUDP=m +CONFIG_AMT=m CONFIG_TUN=m CONFIG_VETH=m CONFIG_VIRTIO_NET=m @@ -480,6 +480,7 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_AMD is not set # CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_VENDOR_ASIX is not set # CONFIG_NET_VENDOR_ATHEROS is not set # CONFIG_NET_VENDOR_BROADCOM is not set # CONFIG_NET_VENDOR_BROCADE is not set @@ -762,12 +763,14 @@ CONFIG_PRIME_NUMBERS=m CONFIG_CRC4=m CONFIG_CRC7=m CONFIG_CRC8=m +CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DEBUG_INFO_BTF=y CONFIG_GDB_SCRIPTS=y CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y @@ -792,9 +795,11 @@ CONFIG_HIST_TRIGGERS=y CONFIG_SAMPLES=y CONFIG_SAMPLE_TRACE_PRINTK=m CONFIG_SAMPLE_FTRACE_DIRECT=m +CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m CONFIG_KUNIT=m CONFIG_KUNIT_DEBUGFS=y CONFIG_LKDTM=m +CONFIG_KPROBES_SANITY_TEST=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y CONFIG_TEST_BPF=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index aceccf3b9a88..eed3b9acfa71 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -65,9 +65,11 @@ CONFIG_ZFCP=y # CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_LSM="yama,loadpin,safesetid,integrity" # CONFIG_ZLIB_DFLTCC is not set +CONFIG_XZ_DEC_MICROLZMA=y CONFIG_PRINTK_TIME=y # CONFIG_SYMBOLIC_ERRNAME is not set CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y CONFIG_PANIC_ON_OOPS=y diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index ea398a05f643..7f3c9ac34bd8 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -74,6 +74,12 @@ void *kexec_file_add_components(struct kimage *image, int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val, unsigned long addr); +#define ARCH_HAS_KIMAGE_ARCH + +struct kimage_arch { + void *ipl_buf; +}; + extern const struct kexec_file_ops s390_kexec_image_ops; extern const struct kexec_file_ops s390_kexec_elf_ops; diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h index e4dc64cc9c55..287bb88f7698 100644 --- a/arch/s390/include/asm/pci_io.h +++ b/arch/s390/include/asm/pci_io.h @@ -14,12 +14,13 @@ /* I/O Map */ #define ZPCI_IOMAP_SHIFT 48 -#define ZPCI_IOMAP_ADDR_BASE 0x8000000000000000UL +#define ZPCI_IOMAP_ADDR_SHIFT 62 +#define ZPCI_IOMAP_ADDR_BASE (1UL << ZPCI_IOMAP_ADDR_SHIFT) #define ZPCI_IOMAP_ADDR_OFF_MASK ((1UL << ZPCI_IOMAP_SHIFT) - 1) #define ZPCI_IOMAP_MAX_ENTRIES \ - ((ULONG_MAX - ZPCI_IOMAP_ADDR_BASE + 1) / (1UL << ZPCI_IOMAP_SHIFT)) + (1UL << (ZPCI_IOMAP_ADDR_SHIFT - ZPCI_IOMAP_SHIFT)) #define ZPCI_IOMAP_ADDR_IDX_MASK \ - (~ZPCI_IOMAP_ADDR_OFF_MASK - ZPCI_IOMAP_ADDR_BASE) + ((ZPCI_IOMAP_ADDR_BASE - 1) & ~ZPCI_IOMAP_ADDR_OFF_MASK) struct zpci_iomap_entry { u32 fh; diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index d72a6df058d7..785d54c9350c 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -191,8 +191,8 @@ static int copy_oldmem_user(void __user *dst, void *src, size_t count) return rc; } else { /* Check for swapped kdump oldmem areas */ - if (oldmem_data.start && from - oldmem_data.size < oldmem_data.size) { - from -= oldmem_data.size; + if (oldmem_data.start && from - oldmem_data.start < oldmem_data.size) { + from -= oldmem_data.start; len = min(count, oldmem_data.size - from); } else if (oldmem_data.start && from < oldmem_data.size) { len = min(count, oldmem_data.size - from); diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index e2cc35775b99..5ad1dde23dc5 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -2156,7 +2156,7 @@ void *ipl_report_finish(struct ipl_report *report) buf = vzalloc(report->size); if (!buf) - return ERR_PTR(-ENOMEM); + goto out; ptr = buf; memcpy(ptr, report->ipib, report->ipib->hdr.len); @@ -2195,6 +2195,7 @@ void *ipl_report_finish(struct ipl_report *report) } BUG_ON(ptr > buf + report->size); +out: return buf; } diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index 528edff085d9..9975ad200d74 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -12,6 +12,7 @@ #include <linux/kexec.h> #include <linux/module_signature.h> #include <linux/verification.h> +#include <linux/vmalloc.h> #include <asm/boot_data.h> #include <asm/ipl.h> #include <asm/setup.h> @@ -170,6 +171,7 @@ static int kexec_file_add_ipl_report(struct kimage *image, struct kexec_buf buf; unsigned long addr; void *ptr, *end; + int ret; buf.image = image; @@ -199,9 +201,13 @@ static int kexec_file_add_ipl_report(struct kimage *image, ptr += len; } + ret = -ENOMEM; buf.buffer = ipl_report_finish(data->report); + if (!buf.buffer) + goto out; buf.bufsz = data->report->size; buf.memsz = buf.bufsz; + image->arch.ipl_buf = buf.buffer; data->memsz += buf.memsz; @@ -209,7 +215,9 @@ static int kexec_file_add_ipl_report(struct kimage *image, data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr); *lc_ipl_parmblock_ptr = (__u32)buf.mem; - return kexec_add_buffer(&buf); + ret = kexec_add_buffer(&buf); +out: + return ret; } void *kexec_file_add_components(struct kimage *image, @@ -322,3 +330,11 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, } return 0; } + +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + vfree(image->arch.ipl_buf); + image->arch.ipl_buf = NULL; + + return kexec_image_post_load_cleanup_default(image); +} diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 40405f2304f1..225ab2d0a4c6 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -606,7 +606,7 @@ static void __init setup_resources(void) static void __init setup_memory_end(void) { - memblock_remove(ident_map_size, ULONG_MAX); + memblock_remove(ident_map_size, PHYS_ADDR_MAX - ident_map_size); max_pfn = max_low_pfn = PFN_DOWN(ident_map_size); pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20); } @@ -638,14 +638,6 @@ static struct notifier_block kdump_mem_nb = { #endif /* - * Make sure that the area above identity mapping is protected - */ -static void __init reserve_above_ident_map(void) -{ - memblock_reserve(ident_map_size, ULONG_MAX); -} - -/* * Reserve memory for kdump kernel to be loaded with kexec */ static void __init reserve_crashkernel(void) @@ -785,7 +777,6 @@ static void __init memblock_add_mem_detect_info(void) } memblock_set_bottom_up(false); memblock_set_node(0, ULONG_MAX, &memblock.memory, 0); - memblock_dump_all(); } /* @@ -826,9 +817,6 @@ static void __init setup_memory(void) storage_key_init_range(start, end); psw_set_key(PAGE_DEFAULT_KEY); - - /* Only cosmetics */ - memblock_enforce_memory_limit(memblock_end_of_DRAM()); } static void __init relocate_amode31_section(void) @@ -999,24 +987,24 @@ void __init setup_arch(char **cmdline_p) setup_control_program_code(); /* Do some memory reservations *before* memory is added to memblock */ - reserve_above_ident_map(); reserve_kernel(); reserve_initrd(); reserve_certificate_list(); reserve_mem_detect_info(); + memblock_set_current_limit(ident_map_size); memblock_allow_resize(); /* Get information about *all* installed memory */ memblock_add_mem_detect_info(); free_mem_detect_info(); + setup_memory_end(); + memblock_dump_all(); + setup_memory(); relocate_amode31_section(); setup_cr(); - setup_uv(); - setup_memory_end(); - setup_memory(); dma_contiguous_reserve(ident_map_size); vmcp_cma_reserve(); if (MACHINE_HAS_EDAT2) diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index df5261e5cfe1..ed9c5c2eafad 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -451,3 +451,4 @@ 446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv sys_futex_waitv diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 035705c9f23e..2b780786fc68 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -84,7 +84,7 @@ static void default_trap_handler(struct pt_regs *regs) { if (user_mode(regs)) { report_user_fault(regs, SIGSEGV, 0); - force_fatal_sig(SIGSEGV); + force_exit_sig(SIGSEGV); } else die(regs, "Unknown program exception"); } diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index e3e6ac5686df..245bddfe9bc0 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -22,7 +22,7 @@ KBUILD_AFLAGS_32 += -m31 -s KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin -LDFLAGS_vdso32.so.dbg += -fPIC -shared -nostdlib -soname=linux-vdso32.so.1 \ +LDFLAGS_vdso32.so.dbg += -fPIC -shared -soname=linux-vdso32.so.1 \ --hash-style=both --build-id=sha1 -melf_s390 -T $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 6568de236701..9e2b95a222a9 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -8,8 +8,9 @@ ARCH_REL_TYPE_ABS += R_390_GOT|R_390_PLT include $(srctree)/lib/vdso/Makefile obj-vdso64 = vdso_user_wrapper.o note.o obj-cvdso64 = vdso64_generic.o getcpu.o -CFLAGS_REMOVE_getcpu.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) -CFLAGS_REMOVE_vdso64_generic.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) +VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK) +CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE) +CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE) # Build rules @@ -25,7 +26,7 @@ KBUILD_AFLAGS_64 += -m64 -s KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS)) KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin -ldflags-y := -fPIC -shared -nostdlib -soname=linux-vdso64.so.1 \ +ldflags-y := -fPIC -shared -soname=linux-vdso64.so.1 \ --hash-style=both --build-id=sha1 -T $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c6257f625929..14a18ba5ff2c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -585,6 +585,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_MAX_VCPUS; else if (sclp.has_esca && sclp.has_64bscao) r = KVM_S390_ESCA_CPU_SLOTS; + if (ext == KVM_CAP_NR_VCPUS) + r = min_t(unsigned int, num_online_cpus(), r); break; case KVM_CAP_S390_COW: r = MACHINE_HAS_ESOP; diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index cfc5f5557c06..bc7973359ae2 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -173,10 +173,11 @@ static noinline int unwindme_func4(struct unwindme *u) } /* - * trigger specification exception + * Trigger operation exception; use insn notation to bypass + * llvm's integrated assembler sanity checks. */ asm volatile( - " mvcl %%r1,%%r1\n" + " .insn e,0x0000\n" /* illegal opcode */ "0: nopr %%r7\n" EX_TABLE(0b, 0b) :); diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h index c7a97f32432f..481a664287e2 100644 --- a/arch/sh/include/asm/cacheflush.h +++ b/arch/sh/include/asm/cacheflush.h @@ -43,7 +43,6 @@ extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 void flush_dcache_page(struct page *page); -void flush_dcache_folio(struct folio *folio); extern void flush_icache_range(unsigned long start, unsigned long end); #define flush_icache_user_range flush_icache_range extern void flush_icache_page(struct vm_area_struct *vma, diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 208f131659c5..d9539d28bdaa 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -451,3 +451,4 @@ 446 common landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index cd677bc564a7..ffab16369bea 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -244,7 +244,7 @@ static int setup_frame(struct ksignal *ksig, struct pt_regs *regs, get_sigframe(ksig, regs, sigframe_size); if (invalid_frame_pointer(sf, sigframe_size)) { - force_fatal_sig(SIGILL); + force_exit_sig(SIGILL); return -EINVAL; } @@ -336,7 +336,7 @@ static int setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs, sf = (struct rt_signal_frame __user *) get_sigframe(ksig, regs, sigframe_size); if (invalid_frame_pointer(sf, sigframe_size)) { - force_fatal_sig(SIGILL); + force_exit_sig(SIGILL); return -EINVAL; } diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index c37764dc764d..46adabcb1720 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -494,3 +494,4 @@ 446 common landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv diff --git a/arch/sparc/kernel/windows.c b/arch/sparc/kernel/windows.c index bbbd40cc6b28..8f20862ccc83 100644 --- a/arch/sparc/kernel/windows.c +++ b/arch/sparc/kernel/windows.c @@ -122,7 +122,7 @@ void try_to_clear_window_buffer(struct pt_regs *regs, int who) if ((sp & 7) || copy_to_user((char __user *) sp, &tp->reg_window[window], sizeof(struct reg_window32))) { - force_fatal_sig(SIGILL); + force_exit_sig(SIGILL); return; } } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 95dd1ee01546..5c2ccb85f2ef 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -193,7 +193,7 @@ config X86 select HAVE_DYNAMIC_FTRACE_WITH_ARGS if X86_64 select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS select HAVE_SAMPLE_FTRACE_DIRECT if X86_64 - select HAVE_SAMPLE_FTRACE_MULTI_DIRECT if X86_64 + select HAVE_SAMPLE_FTRACE_DIRECT_MULTI if X86_64 select HAVE_EBPF_JIT select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_EISA @@ -1932,6 +1932,7 @@ config EFI depends on ACPI select UCS2_STRING select EFI_RUNTIME_WRAPPERS + select ARCH_USE_MEMREMAP_PROT help This enables the kernel to use EFI runtime services that are available (such as the EFI variable services). diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index e38a4cf795d9..97b1f84bb53f 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -574,6 +574,10 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) ud2 1: #endif +#ifdef CONFIG_XEN_PV + ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV +#endif + POP_REGS pop_rdi=0 /* @@ -890,6 +894,7 @@ SYM_CODE_START_LOCAL(paranoid_entry) .Lparanoid_entry_checkgs: /* EBX = 1 -> kernel GSBASE active, no restore required */ movl $1, %ebx + /* * The kernel-enforced convention is a negative GSBASE indicates * a kernel value. No SWAPGS needed on entry and exit. @@ -897,21 +902,14 @@ SYM_CODE_START_LOCAL(paranoid_entry) movl $MSR_GS_BASE, %ecx rdmsr testl %edx, %edx - jns .Lparanoid_entry_swapgs - ret + js .Lparanoid_kernel_gsbase -.Lparanoid_entry_swapgs: + /* EBX = 0 -> SWAPGS required on exit */ + xorl %ebx, %ebx swapgs +.Lparanoid_kernel_gsbase: - /* - * The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an - * unconditional CR3 write, even in the PTI case. So do an lfence - * to prevent GS speculation, regardless of whether PTI is enabled. - */ FENCE_SWAPGS_KERNEL_ENTRY - - /* EBX = 0 -> SWAPGS required on exit */ - xorl %ebx, %ebx ret SYM_CODE_END(paranoid_entry) @@ -993,11 +991,6 @@ SYM_CODE_START_LOCAL(error_entry) pushq %r12 ret -.Lerror_entry_done_lfence: - FENCE_SWAPGS_KERNEL_ENTRY -.Lerror_entry_done: - ret - /* * There are two places in the kernel that can potentially fault with * usergs. Handle them here. B stepping K8s sometimes report a @@ -1020,8 +1013,14 @@ SYM_CODE_START_LOCAL(error_entry) * .Lgs_change's error handler with kernel gsbase. */ SWAPGS - FENCE_SWAPGS_USER_ENTRY - jmp .Lerror_entry_done + + /* + * Issue an LFENCE to prevent GS speculation, regardless of whether it is a + * kernel or user gsbase. + */ +.Lerror_entry_done_lfence: + FENCE_SWAPGS_KERNEL_ENTRY + ret .Lbstep_iret: /* Fix truncated RIP */ diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 0b6b277ee050..fd2ee9408e91 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -226,7 +226,7 @@ bool emulate_vsyscall(unsigned long error_code, if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) { warn_bad_vsyscall(KERN_DEBUG, regs, "seccomp tried to change syscall nr or ip"); - force_fatal_sig(SIGSYS); + force_exit_sig(SIGSYS); return true; } regs->orig_ax = -1; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 42cf01ecdd13..ec6444f2c9dc 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2211,7 +2211,6 @@ intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int /* must not have branches... */ local_irq_save(flags); __intel_pmu_disable_all(false); /* we don't care about BTS */ - __intel_pmu_pebs_disable_all(); __intel_pmu_lbr_disable(); /* ... until here */ return __intel_pmu_snapshot_branch_stack(entries, cnt, flags); @@ -2225,7 +2224,6 @@ intel_pmu_snapshot_arch_branch_stack(struct perf_branch_entry *entries, unsigned /* must not have branches... */ local_irq_save(flags); __intel_pmu_disable_all(false); /* we don't care about BTS */ - __intel_pmu_pebs_disable_all(); __intel_pmu_arch_lbr_disable(); /* ... until here */ return __intel_pmu_snapshot_branch_stack(entries, cnt, flags); diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index eb2c6cea9d0d..3660f698fb2a 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3608,6 +3608,9 @@ static int skx_cha_hw_config(struct intel_uncore_box *box, struct perf_event *ev struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; struct extra_reg *er; int idx = 0; + /* Any of the CHA events may be filtered by Thread/Core-ID.*/ + if (event->hw.config & SNBEP_CBO_PMON_CTL_TID_EN) + idx = SKX_CHA_MSR_PMON_BOX_FILTER_TID; for (er = skx_uncore_cha_extra_regs; er->msr; er++) { if (er->event != (event->hw.config & er->config_mask)) @@ -3675,6 +3678,7 @@ static struct event_constraint skx_uncore_iio_constraints[] = { UNCORE_EVENT_CONSTRAINT(0xc0, 0xc), UNCORE_EVENT_CONSTRAINT(0xc5, 0xc), UNCORE_EVENT_CONSTRAINT(0xd4, 0xc), + UNCORE_EVENT_CONSTRAINT(0xd5, 0xc), EVENT_CONSTRAINT_END }; @@ -4525,6 +4529,13 @@ static void snr_iio_cleanup_mapping(struct intel_uncore_type *type) pmu_iio_cleanup_mapping(type, &snr_iio_mapping_group); } +static struct event_constraint snr_uncore_iio_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x83, 0x3), + UNCORE_EVENT_CONSTRAINT(0xc0, 0xc), + UNCORE_EVENT_CONSTRAINT(0xd5, 0xc), + EVENT_CONSTRAINT_END +}; + static struct intel_uncore_type snr_uncore_iio = { .name = "iio", .num_counters = 4, @@ -4536,6 +4547,7 @@ static struct intel_uncore_type snr_uncore_iio = { .event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT, .box_ctl = SNR_IIO_MSR_PMON_BOX_CTL, .msr_offset = SNR_IIO_MSR_OFFSET, + .constraints = snr_uncore_iio_constraints, .ops = &ivbep_uncore_msr_ops, .format_group = &snr_uncore_iio_format_group, .attr_update = snr_iio_attr_update, diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 24f4a06ac46a..96eb7db31c8e 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -177,6 +177,9 @@ void set_hv_tscchange_cb(void (*cb)(void)) return; } + if (!hv_vp_index) + return; + hv_reenlightenment_cb = cb; /* Make sure callback is registered before we write to MSRs */ @@ -383,20 +386,13 @@ static void __init hv_get_partition_id(void) */ void __init hyperv_init(void) { - u64 guest_id, required_msrs; + u64 guest_id; union hv_x64_msr_hypercall_contents hypercall_msr; int cpuhp; if (x86_hyper_type != X86_HYPER_MS_HYPERV) return; - /* Absolutely required MSRs */ - required_msrs = HV_MSR_HYPERCALL_AVAILABLE | - HV_MSR_VP_INDEX_AVAILABLE; - - if ((ms_hyperv.features & required_msrs) != required_msrs) - return; - if (hv_common_init()) return; diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 6053674f9132..c2767a6a387e 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -102,12 +102,6 @@ extern void switch_fpu_return(void); */ extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name); -/* - * Tasks that are not using SVA have mm->pasid set to zero to note that they - * will not have the valid bit set in MSR_IA32_PASID while they are running. - */ -#define PASID_DISABLED 0 - /* Trap handling */ extern int fpu__exception_code(struct fpu *fpu, int trap_nr); extern void fpu_sync_fpstate(struct fpu *fpu); diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 5a0bcf8b78d7..048b6d5aff50 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -108,7 +108,7 @@ #define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */ #define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */ -#define INTEL_FAM6_RAPTOR_LAKE 0xB7 +#define INTEL_FAM6_RAPTORLAKE 0xB7 /* "Small Core" Processors (Atom) */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e5d8700319cc..2164b9f4c7b0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -97,7 +97,7 @@ KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26) #define KVM_REQ_TLB_FLUSH_GUEST \ - KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP) + KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_APF_READY KVM_ARCH_REQ(28) #define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29) #define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \ @@ -363,6 +363,7 @@ union kvm_mmu_extended_role { unsigned int cr4_smap:1; unsigned int cr4_smep:1; unsigned int cr4_la57:1; + unsigned int efer_lma:1; }; }; @@ -1035,6 +1036,7 @@ struct kvm_x86_msr_filter { #define APICV_INHIBIT_REASON_PIT_REINJ 4 #define APICV_INHIBIT_REASON_X2APIC 5 #define APICV_INHIBIT_REASON_BLOCKIRQ 6 +#define APICV_INHIBIT_REASON_ABSENT 7 struct kvm_arch { unsigned long n_used_mmu_pages; diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index 2cef6c5a52c2..6acaf5af0a3d 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -73,4 +73,15 @@ #define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK) +/* + * Error codes related to GHCB input that can be communicated back to the guest + * by setting the lower 32-bits of the GHCB SW_EXITINFO1 field to 2. + */ +#define GHCB_ERR_NOT_REGISTERED 1 +#define GHCB_ERR_INVALID_USAGE 2 +#define GHCB_ERR_INVALID_SCRATCH_AREA 3 +#define GHCB_ERR_MISSING_INPUT 4 +#define GHCB_ERR_INVALID_INPUT 5 +#define GHCB_ERR_INVALID_EVENT 6 + #endif diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 0575f5863b7f..e5e0fe10c692 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -281,13 +281,13 @@ HYPERVISOR_callback_op(int cmd, void *arg) return _hypercall2(int, callback_op, cmd, arg); } -static inline int +static __always_inline int HYPERVISOR_set_debugreg(int reg, unsigned long value) { return _hypercall2(int, set_debugreg, reg, value); } -static inline unsigned long +static __always_inline unsigned long HYPERVISOR_get_debugreg(int reg) { return _hypercall1(unsigned long, get_debugreg, reg); diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 4957f59deb40..5adab895127e 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -64,6 +64,7 @@ void xen_arch_unregister_cpu(int num); #ifdef CONFIG_PVH void __init xen_pvh_init(struct boot_params *boot_params); +void __init mem_map_via_hcall(struct boot_params *boot_params_p); #endif #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 4794b716ec79..ff55df60228f 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -163,12 +163,22 @@ static uint32_t __init ms_hyperv_platform(void) cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); - if (eax >= HYPERV_CPUID_MIN && - eax <= HYPERV_CPUID_MAX && - !memcmp("Microsoft Hv", hyp_signature, 12)) - return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS; + if (eax < HYPERV_CPUID_MIN || eax > HYPERV_CPUID_MAX || + memcmp("Microsoft Hv", hyp_signature, 12)) + return 0; - return 0; + /* HYPERCALL and VP_INDEX MSRs are mandatory for all features. */ + eax = cpuid_eax(HYPERV_CPUID_FEATURES); + if (!(eax & HV_MSR_HYPERCALL_AVAILABLE)) { + pr_warn("x86/hyperv: HYPERCALL MSR not available.\n"); + return 0; + } + if (!(eax & HV_MSR_VP_INDEX_AVAILABLE)) { + pr_warn("x86/hyperv: VP_INDEX MSR not available.\n"); + return 0; + } + + return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS; } static unsigned char hv_get_nmi_reason(void) diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 63d3de02bbcc..8471a8b9b48e 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -28,8 +28,7 @@ static DECLARE_WAIT_QUEUE_HEAD(ksgxd_waitq); static LIST_HEAD(sgx_active_page_list); static DEFINE_SPINLOCK(sgx_reclaimer_lock); -/* The free page list lock protected variables prepend the lock. */ -static unsigned long sgx_nr_free_pages; +static atomic_long_t sgx_nr_free_pages = ATOMIC_LONG_INIT(0); /* Nodes with one or more EPC sections. */ static nodemask_t sgx_numa_mask; @@ -403,14 +402,15 @@ skip: spin_lock(&node->lock); list_add_tail(&epc_page->list, &node->free_page_list); - sgx_nr_free_pages++; spin_unlock(&node->lock); + atomic_long_inc(&sgx_nr_free_pages); } } static bool sgx_should_reclaim(unsigned long watermark) { - return sgx_nr_free_pages < watermark && !list_empty(&sgx_active_page_list); + return atomic_long_read(&sgx_nr_free_pages) < watermark && + !list_empty(&sgx_active_page_list); } static int ksgxd(void *p) @@ -471,9 +471,9 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid) page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list); list_del_init(&page->list); - sgx_nr_free_pages--; spin_unlock(&node->lock); + atomic_long_dec(&sgx_nr_free_pages); return page; } @@ -625,9 +625,9 @@ void sgx_free_epc_page(struct sgx_epc_page *page) spin_lock(&node->lock); list_add_tail(&page->list, &node->free_page_list); - sgx_nr_free_pages++; spin_unlock(&node->lock); + atomic_long_inc(&sgx_nr_free_pages); } static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size, diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index d5958278eba6..91d4b6de58ab 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -118,7 +118,7 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame, struct fpstate *fpstate) { struct xregs_state __user *x = buf; - struct _fpx_sw_bytes sw_bytes; + struct _fpx_sw_bytes sw_bytes = {}; u32 xfeatures; int err; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e9ee8b526319..04143a653a8a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -964,6 +964,9 @@ unsigned long __get_wchan(struct task_struct *p) struct unwind_state state; unsigned long addr = 0; + if (!try_get_task_stack(p)) + return 0; + for (unwind_start(&state, p, NULL, NULL); !unwind_done(&state); unwind_next_frame(&state)) { addr = unwind_get_return_address(&state); @@ -974,6 +977,8 @@ unsigned long __get_wchan(struct task_struct *p) break; } + put_task_stack(p); + return addr; } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 49b596db5631..6a190c7f4d71 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -742,6 +742,28 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) return 0; } +static char * __init prepare_command_line(void) +{ +#ifdef CONFIG_CMDLINE_BOOL +#ifdef CONFIG_CMDLINE_OVERRIDE + strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); +#else + if (builtin_cmdline[0]) { + /* append boot loader cmdline to builtin */ + strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE); + strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE); + strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); + } +#endif +#endif + + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); + + parse_early_param(); + + return command_line; +} + /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -831,6 +853,23 @@ void __init setup_arch(char **cmdline_p) x86_init.oem.arch_setup(); /* + * x86_configure_nx() is called before parse_early_param() (called by + * prepare_command_line()) to detect whether hardware doesn't support + * NX (so that the early EHCI debug console setup can safely call + * set_fixmap()). It may then be called again from within noexec_setup() + * during parsing early parameters to honor the respective command line + * option. + */ + x86_configure_nx(); + + /* + * This parses early params and it needs to run before + * early_reserve_memory() because latter relies on such settings + * supplied as early params. + */ + *cmdline_p = prepare_command_line(); + + /* * Do some memory reservations *before* memory is added to memblock, so * memblock allocations won't overwrite it. * @@ -863,33 +902,6 @@ void __init setup_arch(char **cmdline_p) bss_resource.start = __pa_symbol(__bss_start); bss_resource.end = __pa_symbol(__bss_stop)-1; -#ifdef CONFIG_CMDLINE_BOOL -#ifdef CONFIG_CMDLINE_OVERRIDE - strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); -#else - if (builtin_cmdline[0]) { - /* append boot loader cmdline to builtin */ - strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE); - strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE); - strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); - } -#endif -#endif - - strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); - *cmdline_p = command_line; - - /* - * x86_configure_nx() is called before parse_early_param() to detect - * whether hardware doesn't support NX (so that the early EHCI debug - * console setup can safely call set_fixmap()). It may then be called - * again from within noexec_setup() during parsing early parameters - * to honor the respective command line option. - */ - x86_configure_nx(); - - parse_early_param(); - #ifdef CONFIG_MEMORY_HOTPLUG /* * Memory used by the kernel cannot be hot-removed because Linux diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 74f0ec955384..a9fc2ac7a8bd 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -294,11 +294,6 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, char *dst, char *buf, size_t size) { unsigned long error_code = X86_PF_PROT | X86_PF_WRITE; - char __user *target = (char __user *)dst; - u64 d8; - u32 d4; - u16 d2; - u8 d1; /* * This function uses __put_user() independent of whether kernel or user @@ -320,26 +315,42 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, * instructions here would cause infinite nesting. */ switch (size) { - case 1: + case 1: { + u8 d1; + u8 __user *target = (u8 __user *)dst; + memcpy(&d1, buf, 1); if (__put_user(d1, target)) goto fault; break; - case 2: + } + case 2: { + u16 d2; + u16 __user *target = (u16 __user *)dst; + memcpy(&d2, buf, 2); if (__put_user(d2, target)) goto fault; break; - case 4: + } + case 4: { + u32 d4; + u32 __user *target = (u32 __user *)dst; + memcpy(&d4, buf, 4); if (__put_user(d4, target)) goto fault; break; - case 8: + } + case 8: { + u64 d8; + u64 __user *target = (u64 __user *)dst; + memcpy(&d8, buf, 8); if (__put_user(d8, target)) goto fault; break; + } default: WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); return ES_UNSUPPORTED; @@ -362,11 +373,6 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, char *src, char *buf, size_t size) { unsigned long error_code = X86_PF_PROT; - char __user *s = (char __user *)src; - u64 d8; - u32 d4; - u16 d2; - u8 d1; /* * This function uses __get_user() independent of whether kernel or user @@ -388,26 +394,41 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, * instructions here would cause infinite nesting. */ switch (size) { - case 1: + case 1: { + u8 d1; + u8 __user *s = (u8 __user *)src; + if (__get_user(d1, s)) goto fault; memcpy(buf, &d1, 1); break; - case 2: + } + case 2: { + u16 d2; + u16 __user *s = (u16 __user *)src; + if (__get_user(d2, s)) goto fault; memcpy(buf, &d2, 2); break; - case 4: + } + case 4: { + u32 d4; + u32 __user *s = (u32 __user *)src; + if (__get_user(d4, s)) goto fault; memcpy(buf, &d4, 4); break; - case 8: + } + case 8: { + u64 d8; + u64 __user *s = (u64 __user *)src; if (__get_user(d8, s)) goto fault; memcpy(buf, &d8, 8); break; + } default: WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); return ES_UNSUPPORTED; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ac2909f0cab3..617012f4619f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -579,6 +579,17 @@ static struct sched_domain_topology_level x86_numa_in_package_topology[] = { { NULL, }, }; +static struct sched_domain_topology_level x86_hybrid_topology[] = { +#ifdef CONFIG_SCHED_SMT + { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) }, +#endif +#ifdef CONFIG_SCHED_MC + { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) }, +#endif + { cpu_cpu_mask, SD_INIT_NAME(DIE) }, + { NULL, }, +}; + static struct sched_domain_topology_level x86_topology[] = { #ifdef CONFIG_SCHED_SMT { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) }, @@ -1469,8 +1480,11 @@ void __init native_smp_cpus_done(unsigned int max_cpus) calculate_max_logical_packages(); + /* XXX for now assume numa-in-package and hybrid don't overlap */ if (x86_has_numa_in_package) set_sched_topology(x86_numa_in_package_topology); + if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) + set_sched_topology(x86_hybrid_topology); nmi_selftest(); impress_friends(); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 2e076a459a0c..a698196377be 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1180,6 +1180,12 @@ void mark_tsc_unstable(char *reason) EXPORT_SYMBOL_GPL(mark_tsc_unstable); +static void __init tsc_disable_clocksource_watchdog(void) +{ + clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY; + clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; +} + static void __init check_system_tsc_reliable(void) { #if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC) @@ -1196,6 +1202,23 @@ static void __init check_system_tsc_reliable(void) #endif if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) tsc_clocksource_reliable = 1; + + /* + * Disable the clocksource watchdog when the system has: + * - TSC running at constant frequency + * - TSC which does not stop in C-States + * - the TSC_ADJUST register which allows to detect even minimal + * modifications + * - not more than two sockets. As the number of sockets cannot be + * evaluated at the early boot stage where this has to be + * invoked, check the number of online memory nodes as a + * fallback solution which is an reasonable estimate. + */ + if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && + boot_cpu_has(X86_FEATURE_NONSTOP_TSC) && + boot_cpu_has(X86_FEATURE_TSC_ADJUST) && + nr_online_nodes <= 2) + tsc_disable_clocksource_watchdog(); } /* @@ -1387,9 +1410,6 @@ static int __init init_tsc_clocksource(void) if (tsc_unstable) goto unreg; - if (tsc_clocksource_reliable || no_tsc_watchdog) - clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; - if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3)) clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP; @@ -1527,7 +1547,7 @@ void __init tsc_init(void) } if (tsc_clocksource_reliable || no_tsc_watchdog) - clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY; + tsc_disable_clocksource_watchdog(); clocksource_register_khz(&clocksource_tsc_early, tsc_khz); detect_art(); diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 50a4515fe0ad..9452dc9664b5 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -30,6 +30,7 @@ struct tsc_adjust { }; static DEFINE_PER_CPU(struct tsc_adjust, tsc_adjust); +static struct timer_list tsc_sync_check_timer; /* * TSC's on different sockets may be reset asynchronously. @@ -77,6 +78,46 @@ void tsc_verify_tsc_adjust(bool resume) } } +/* + * Normally the tsc_sync will be checked every time system enters idle + * state, but there is still caveat that a system won't enter idle, + * either because it's too busy or configured purposely to not enter + * idle. + * + * So setup a periodic timer (every 10 minutes) to make sure the check + * is always on. + */ + +#define SYNC_CHECK_INTERVAL (HZ * 600) + +static void tsc_sync_check_timer_fn(struct timer_list *unused) +{ + int next_cpu; + + tsc_verify_tsc_adjust(false); + + /* Run the check for all onlined CPUs in turn */ + next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); + if (next_cpu >= nr_cpu_ids) + next_cpu = cpumask_first(cpu_online_mask); + + tsc_sync_check_timer.expires += SYNC_CHECK_INTERVAL; + add_timer_on(&tsc_sync_check_timer, next_cpu); +} + +static int __init start_sync_check_timer(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_TSC_ADJUST) || tsc_clocksource_reliable) + return 0; + + timer_setup(&tsc_sync_check_timer, tsc_sync_check_timer_fn, 0); + tsc_sync_check_timer.expires = jiffies + SYNC_CHECK_INTERVAL; + add_timer(&tsc_sync_check_timer); + + return 0; +} +late_initcall(start_sync_check_timer); + static void tsc_sanitize_first_cpu(struct tsc_adjust *cur, s64 bootval, unsigned int cpu, bool bootcpu) { diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index cce1c89cb7df..c21bcd668284 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -160,7 +160,7 @@ Efault_end: user_access_end(); Efault: pr_alert("could not access userspace vm86 info\n"); - force_fatal_sig(SIGSEGV); + force_exit_sig(SIGSEGV); goto exit_vm86; } diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index e19dabf1848b..07e9215e911d 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -125,7 +125,7 @@ static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu) } } -struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu) +static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu) { u32 base = vcpu->arch.kvm_cpuid_base; diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 4a555f32885a..8d8c1cc7cb53 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1922,11 +1922,13 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL; + if (all_cpus) + goto check_and_send_ipi; + if (!sparse_banks_len) goto ret_success; - if (!all_cpus && - kvm_read_guest(kvm, + if (kvm_read_guest(kvm, hc->ingpa + offsetof(struct hv_send_ipi_ex, vp_set.bank_contents), sparse_banks, @@ -1934,6 +1936,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool return HV_STATUS_INVALID_HYPERCALL_INPUT; } +check_and_send_ipi: if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) return HV_STATUS_INVALID_HYPERCALL_INPUT; @@ -2022,7 +2025,7 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) { bool longmode; - longmode = is_64_bit_mode(vcpu); + longmode = is_64_bit_hypercall(vcpu); if (longmode) kvm_rax_write(vcpu, result); else { @@ -2171,7 +2174,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) } #ifdef CONFIG_X86_64 - if (is_64_bit_mode(vcpu)) { + if (is_64_bit_hypercall(vcpu)) { hc.param = kvm_rcx_read(vcpu); hc.ingpa = kvm_rdx_read(vcpu); hc.outgpa = kvm_r8_read(vcpu); diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index e66e620c3bed..539333ac4b38 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -81,7 +81,6 @@ struct kvm_ioapic { unsigned long irq_states[IOAPIC_NUM_PINS]; struct kvm_io_device dev; struct kvm *kvm; - void (*ack_notifier)(void *opaque, int irq); spinlock_t lock; struct rtc_status rtc_status; struct delayed_work eoi_inject; diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 650642b18d15..c2d7cfe82d00 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -56,7 +56,6 @@ struct kvm_pic { struct kvm_io_device dev_master; struct kvm_io_device dev_slave; struct kvm_io_device dev_elcr; - void (*ack_notifier)(void *opaque, int irq); unsigned long irq_states[PIC_NUM_PINS]; }; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 759952dd1222..f206fc35deff 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -707,7 +707,7 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr) { int highest_irr; - if (apic->vcpu->arch.apicv_active) + if (kvm_x86_ops.sync_pir_to_irr) highest_irr = static_call(kvm_x86_sync_pir_to_irr)(apic->vcpu); else highest_irr = apic_find_highest_irr(apic); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 33794379949e..e2e1d012df22 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1582,7 +1582,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) flush = kvm_handle_gfn_range(kvm, range, kvm_unmap_rmapp); if (is_tdp_mmu_enabled(kvm)) - flush |= kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush); + flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush); return flush; } @@ -1936,7 +1936,11 @@ static void mmu_audit_disable(void) { } static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp) { - return sp->role.invalid || + if (sp->role.invalid) + return true; + + /* TDP MMU pages due not use the MMU generation. */ + return !sp->tdp_mmu_page && unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen); } @@ -2173,10 +2177,10 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato iterator->shadow_addr = root; iterator->level = vcpu->arch.mmu->shadow_root_level; - if (iterator->level == PT64_ROOT_4LEVEL && + if (iterator->level >= PT64_ROOT_4LEVEL && vcpu->arch.mmu->root_level < PT64_ROOT_4LEVEL && !vcpu->arch.mmu->direct_map) - --iterator->level; + iterator->level = PT32E_ROOT_LEVEL; if (iterator->level == PT32E_ROOT_LEVEL) { /* @@ -3976,6 +3980,20 @@ out_retry: return true; } +/* + * Returns true if the page fault is stale and needs to be retried, i.e. if the + * root was invalidated by a memslot update or a relevant mmu_notifier fired. + */ +static bool is_page_fault_stale(struct kvm_vcpu *vcpu, + struct kvm_page_fault *fault, int mmu_seq) +{ + if (is_obsolete_sp(vcpu->kvm, to_shadow_page(vcpu->arch.mmu->root_hpa))) + return true; + + return fault->slot && + mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva); +} + static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { bool is_tdp_mmu_fault = is_tdp_mmu(vcpu->arch.mmu); @@ -4013,8 +4031,9 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault else write_lock(&vcpu->kvm->mmu_lock); - if (fault->slot && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva)) + if (is_page_fault_stale(vcpu, fault, mmu_seq)) goto out_unlock; + r = make_mmu_pages_available(vcpu); if (r) goto out_unlock; @@ -4682,6 +4701,7 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu, /* PKEY and LA57 are active iff long mode is active. */ ext.cr4_pke = ____is_efer_lma(regs) && ____is_cr4_pke(regs); ext.cr4_la57 = ____is_efer_lma(regs) && ____is_cr4_la57(regs); + ext.efer_lma = ____is_efer_lma(regs); } ext.valid = 1; @@ -4854,7 +4874,7 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0, struct kvm_mmu *context = &vcpu->arch.guest_mmu; struct kvm_mmu_role_regs regs = { .cr0 = cr0, - .cr4 = cr4, + .cr4 = cr4 & ~X86_CR4_PKE, .efer = efer, }; union kvm_mmu_role new_role; @@ -4918,7 +4938,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, context->direct_map = false; update_permission_bitmask(context, true); - update_pkru_bitmask(context); + context->pkru_mask = 0; reset_rsvds_bits_mask_ept(vcpu, context, execonly); reset_ept_shadow_zero_bits_mask(vcpu, context, execonly); } @@ -5024,6 +5044,14 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu) /* * Invalidate all MMU roles to force them to reinitialize as CPUID * information is factored into reserved bit calculations. + * + * Correctly handling multiple vCPU models with respect to paging and + * physical address properties) in a single VM would require tracking + * all relevant CPUID information in kvm_mmu_page_role. That is very + * undesirable as it would increase the memory requirements for + * gfn_track (see struct kvm_mmu_page_role comments). For now that + * problem is swept under the rug; KVM's CPUID API is horrific and + * it's all but impossible to solve it without introducing a new API. */ vcpu->arch.root_mmu.mmu_role.ext.valid = 0; vcpu->arch.guest_mmu.mmu_role.ext.valid = 0; @@ -5031,24 +5059,10 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu) kvm_mmu_reset_context(vcpu); /* - * KVM does not correctly handle changing guest CPUID after KVM_RUN, as - * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't - * tracked in kvm_mmu_page_role. As a result, KVM may miss guest page - * faults due to reusing SPs/SPTEs. Alert userspace, but otherwise - * sweep the problem under the rug. - * - * KVM's horrific CPUID ABI makes the problem all but impossible to - * solve, as correctly handling multiple vCPU models (with respect to - * paging and physical address properties) in a single VM would require - * tracking all relevant CPUID information in kvm_mmu_page_role. That - * is very undesirable as it would double the memory requirements for - * gfn_track (see struct kvm_mmu_page_role comments), and in practice - * no sane VMM mucks with the core vCPU model on the fly. + * Changing guest CPUID after KVM_RUN is forbidden, see the comment in + * kvm_arch_vcpu_ioctl(). */ - if (vcpu->arch.last_vmentry_cpu != -1) { - pr_warn_ratelimited("KVM: KVM_SET_CPUID{,2} after KVM_RUN may cause guest instability\n"); - pr_warn_ratelimited("KVM: KVM_SET_CPUID{,2} will fail after KVM_RUN starting with Linux 5.16\n"); - } + KVM_BUG_ON(vcpu->arch.last_vmentry_cpu != -1, vcpu->kvm); } void kvm_mmu_reset_context(struct kvm_vcpu *vcpu) @@ -5368,7 +5382,7 @@ void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) { - kvm_mmu_invalidate_gva(vcpu, vcpu->arch.mmu, gva, INVALID_PAGE); + kvm_mmu_invalidate_gva(vcpu, vcpu->arch.walk_mmu, gva, INVALID_PAGE); ++vcpu->stat.invlpg; } EXPORT_SYMBOL_GPL(kvm_mmu_invlpg); @@ -5853,8 +5867,6 @@ restart: void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, const struct kvm_memory_slot *slot) { - bool flush = false; - if (kvm_memslots_have_rmaps(kvm)) { write_lock(&kvm->mmu_lock); /* @@ -5862,17 +5874,14 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, * logging at a 4k granularity and never creates collapsible * 2m SPTEs during dirty logging. */ - flush = slot_handle_level_4k(kvm, slot, kvm_mmu_zap_collapsible_spte, true); - if (flush) + if (slot_handle_level_4k(kvm, slot, kvm_mmu_zap_collapsible_spte, true)) kvm_arch_flush_remote_tlbs_memslot(kvm, slot); write_unlock(&kvm->mmu_lock); } if (is_tdp_mmu_enabled(kvm)) { read_lock(&kvm->mmu_lock); - flush = kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot, flush); - if (flush) - kvm_arch_flush_remote_tlbs_memslot(kvm, slot); + kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot); read_unlock(&kvm->mmu_lock); } } @@ -6181,23 +6190,46 @@ void kvm_mmu_module_exit(void) mmu_audit_disable(); } +/* + * Calculate the effective recovery period, accounting for '0' meaning "let KVM + * select a halving time of 1 hour". Returns true if recovery is enabled. + */ +static bool calc_nx_huge_pages_recovery_period(uint *period) +{ + /* + * Use READ_ONCE to get the params, this may be called outside of the + * param setters, e.g. by the kthread to compute its next timeout. + */ + bool enabled = READ_ONCE(nx_huge_pages); + uint ratio = READ_ONCE(nx_huge_pages_recovery_ratio); + + if (!enabled || !ratio) + return false; + + *period = READ_ONCE(nx_huge_pages_recovery_period_ms); + if (!*period) { + /* Make sure the period is not less than one second. */ + ratio = min(ratio, 3600u); + *period = 60 * 60 * 1000 / ratio; + } + return true; +} + static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel_param *kp) { bool was_recovery_enabled, is_recovery_enabled; uint old_period, new_period; int err; - was_recovery_enabled = nx_huge_pages_recovery_ratio; - old_period = nx_huge_pages_recovery_period_ms; + was_recovery_enabled = calc_nx_huge_pages_recovery_period(&old_period); err = param_set_uint(val, kp); if (err) return err; - is_recovery_enabled = nx_huge_pages_recovery_ratio; - new_period = nx_huge_pages_recovery_period_ms; + is_recovery_enabled = calc_nx_huge_pages_recovery_period(&new_period); - if (READ_ONCE(nx_huge_pages) && is_recovery_enabled && + if (is_recovery_enabled && (!was_recovery_enabled || old_period > new_period)) { struct kvm *kvm; @@ -6261,18 +6293,13 @@ static void kvm_recover_nx_lpages(struct kvm *kvm) static long get_nx_lpage_recovery_timeout(u64 start_time) { - uint ratio = READ_ONCE(nx_huge_pages_recovery_ratio); - uint period = READ_ONCE(nx_huge_pages_recovery_period_ms); + bool enabled; + uint period; - if (!period && ratio) { - /* Make sure the period is not less than one second. */ - ratio = min(ratio, 3600u); - period = 60 * 60 * 1000 / ratio; - } + enabled = calc_nx_huge_pages_recovery_period(&period); - return READ_ONCE(nx_huge_pages) && ratio - ? start_time + msecs_to_jiffies(period) - get_jiffies_64() - : MAX_SCHEDULE_TIMEOUT; + return enabled ? start_time + msecs_to_jiffies(period) - get_jiffies_64() + : MAX_SCHEDULE_TIMEOUT; } static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data) diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index f87d36898c44..708a5d297fe1 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -911,7 +911,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault r = RET_PF_RETRY; write_lock(&vcpu->kvm->mmu_lock); - if (fault->slot && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva)) + + if (is_page_fault_stale(vcpu, fault, mmu_seq)) goto out_unlock; kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index a54c3491af42..1db8496259ad 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -317,9 +317,6 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt, struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(pt)); int level = sp->role.level; gfn_t base_gfn = sp->gfn; - u64 old_child_spte; - u64 *sptep; - gfn_t gfn; int i; trace_kvm_mmu_prepare_zap_page(sp); @@ -327,8 +324,9 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt, tdp_mmu_unlink_page(kvm, sp, shared); for (i = 0; i < PT64_ENT_PER_PAGE; i++) { - sptep = rcu_dereference(pt) + i; - gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level); + u64 *sptep = rcu_dereference(pt) + i; + gfn_t gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level); + u64 old_child_spte; if (shared) { /* @@ -374,7 +372,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt, shared); } - kvm_flush_remote_tlbs_with_address(kvm, gfn, + kvm_flush_remote_tlbs_with_address(kvm, base_gfn, KVM_PAGES_PER_HPAGE(level + 1)); call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback); @@ -1033,9 +1031,9 @@ bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range, { struct kvm_mmu_page *root; - for_each_tdp_mmu_root(kvm, root, range->slot->as_id) - flush |= zap_gfn_range(kvm, root, range->start, range->end, - range->may_block, flush, false); + for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false) + flush = zap_gfn_range(kvm, root, range->start, range->end, + range->may_block, flush, false); return flush; } @@ -1364,10 +1362,9 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm, * Clear leaf entries which could be replaced by large mappings, for * GFNs within the slot. */ -static bool zap_collapsible_spte_range(struct kvm *kvm, +static void zap_collapsible_spte_range(struct kvm *kvm, struct kvm_mmu_page *root, - const struct kvm_memory_slot *slot, - bool flush) + const struct kvm_memory_slot *slot) { gfn_t start = slot->base_gfn; gfn_t end = start + slot->npages; @@ -1378,10 +1375,8 @@ static bool zap_collapsible_spte_range(struct kvm *kvm, tdp_root_for_each_pte(iter, root, start, end) { retry: - if (tdp_mmu_iter_cond_resched(kvm, &iter, flush, true)) { - flush = false; + if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true)) continue; - } if (!is_shadow_present_pte(iter.old_spte) || !is_last_spte(iter.old_spte, iter.level)) @@ -1393,6 +1388,7 @@ retry: pfn, PG_LEVEL_NUM)) continue; + /* Note, a successful atomic zap also does a remote TLB flush. */ if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) { /* * The iter must explicitly re-read the SPTE because @@ -1401,30 +1397,24 @@ retry: iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep)); goto retry; } - flush = true; } rcu_read_unlock(); - - return flush; } /* * Clear non-leaf entries (and free associated page tables) which could * be replaced by large mappings, for GFNs within the slot. */ -bool kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm, - const struct kvm_memory_slot *slot, - bool flush) +void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm, + const struct kvm_memory_slot *slot) { struct kvm_mmu_page *root; lockdep_assert_held_read(&kvm->mmu_lock); for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true) - flush = zap_collapsible_spte_range(kvm, root, slot, flush); - - return flush; + zap_collapsible_spte_range(kvm, root, slot); } /* diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index 476b133544dd..3899004a5d91 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -64,9 +64,8 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, unsigned long mask, bool wrprot); -bool kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm, - const struct kvm_memory_slot *slot, - bool flush); +void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm, + const struct kvm_memory_slot *slot); bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index affc0ea98d30..8f9af7b7dbbe 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -900,6 +900,7 @@ out: bool svm_check_apicv_inhibit_reasons(ulong bit) { ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) | + BIT(APICV_INHIBIT_REASON_ABSENT) | BIT(APICV_INHIBIT_REASON_HYPERV) | BIT(APICV_INHIBIT_REASON_NESTED) | BIT(APICV_INHIBIT_REASON_IRQWIN) | @@ -989,16 +990,18 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu) static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run) { struct vcpu_svm *svm = to_svm(vcpu); + int cpu = get_cpu(); + WARN_ON(cpu != vcpu->cpu); svm->avic_is_running = is_run; - if (!kvm_vcpu_apicv_active(vcpu)) - return; - - if (is_run) - avic_vcpu_load(vcpu, vcpu->cpu); - else - avic_vcpu_put(vcpu); + if (kvm_vcpu_apicv_active(vcpu)) { + if (is_run) + avic_vcpu_load(vcpu, cpu); + else + avic_vcpu_put(vcpu); + } + put_cpu(); } void svm_vcpu_blocking(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index 871c426ec389..b4095dfeeee6 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -281,7 +281,7 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu) pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS; pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1; - pmu->reserved_bits = 0xffffffff00200000ull; + pmu->reserved_bits = 0xfffffff000280000ull; pmu->version = 1; /* not applicable to AMD; but clean them to prevent any fall out */ pmu->counter_bitmask[KVM_PMC_FIXED] = 0; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 902c52a8dd0c..7656a2c5662a 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -237,7 +237,6 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) { struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; - bool es_active = argp->id == KVM_SEV_ES_INIT; int asid, ret; if (kvm->created_vcpus) @@ -247,7 +246,8 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) if (unlikely(sev->active)) return ret; - sev->es_active = es_active; + sev->active = true; + sev->es_active = argp->id == KVM_SEV_ES_INIT; asid = sev_asid_new(sev); if (asid < 0) goto e_no_asid; @@ -257,8 +257,6 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) if (ret) goto e_free; - sev->active = true; - sev->asid = asid; INIT_LIST_HEAD(&sev->regions_list); return 0; @@ -268,6 +266,7 @@ e_free: sev->asid = 0; e_no_asid: sev->es_active = false; + sev->active = false; return ret; } @@ -1530,7 +1529,7 @@ static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error); } -static bool cmd_allowed_from_miror(u32 cmd_id) +static bool is_cmd_allowed_from_mirror(u32 cmd_id) { /* * Allow mirrors VM to call KVM_SEV_LAUNCH_UPDATE_VMSA to enable SEV-ES @@ -1544,28 +1543,50 @@ static bool cmd_allowed_from_miror(u32 cmd_id) return false; } -static int sev_lock_for_migration(struct kvm *kvm) +static int sev_lock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) { - struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info; + struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info; + int r = -EBUSY; + + if (dst_kvm == src_kvm) + return -EINVAL; /* - * Bail if this VM is already involved in a migration to avoid deadlock - * between two VMs trying to migrate to/from each other. + * Bail if these VMs are already involved in a migration to avoid + * deadlock between two VMs trying to migrate to/from each other. */ - if (atomic_cmpxchg_acquire(&sev->migration_in_progress, 0, 1)) + if (atomic_cmpxchg_acquire(&dst_sev->migration_in_progress, 0, 1)) return -EBUSY; - mutex_lock(&kvm->lock); + if (atomic_cmpxchg_acquire(&src_sev->migration_in_progress, 0, 1)) + goto release_dst; + r = -EINTR; + if (mutex_lock_killable(&dst_kvm->lock)) + goto release_src; + if (mutex_lock_killable(&src_kvm->lock)) + goto unlock_dst; return 0; + +unlock_dst: + mutex_unlock(&dst_kvm->lock); +release_src: + atomic_set_release(&src_sev->migration_in_progress, 0); +release_dst: + atomic_set_release(&dst_sev->migration_in_progress, 0); + return r; } -static void sev_unlock_after_migration(struct kvm *kvm) +static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) { - struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info; + struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info; - mutex_unlock(&kvm->lock); - atomic_set_release(&sev->migration_in_progress, 0); + mutex_unlock(&dst_kvm->lock); + mutex_unlock(&src_kvm->lock); + atomic_set_release(&dst_sev->migration_in_progress, 0); + atomic_set_release(&src_sev->migration_in_progress, 0); } @@ -1608,14 +1629,15 @@ static void sev_migrate_from(struct kvm_sev_info *dst, dst->asid = src->asid; dst->handle = src->handle; dst->pages_locked = src->pages_locked; + dst->enc_context_owner = src->enc_context_owner; src->asid = 0; src->active = false; src->handle = 0; src->pages_locked = 0; + src->enc_context_owner = NULL; - INIT_LIST_HEAD(&dst->regions_list); - list_replace_init(&src->regions_list, &dst->regions_list); + list_cut_before(&dst->regions_list, &src->regions_list, &src->regions_list); } static int sev_es_migrate_from(struct kvm *dst, struct kvm *src) @@ -1667,15 +1689,6 @@ int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd) bool charged = false; int ret; - ret = sev_lock_for_migration(kvm); - if (ret) - return ret; - - if (sev_guest(kvm)) { - ret = -EINVAL; - goto out_unlock; - } - source_kvm_file = fget(source_fd); if (!file_is_kvm(source_kvm_file)) { ret = -EBADF; @@ -1683,16 +1696,26 @@ int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd) } source_kvm = source_kvm_file->private_data; - ret = sev_lock_for_migration(source_kvm); + ret = sev_lock_two_vms(kvm, source_kvm); if (ret) goto out_fput; - if (!sev_guest(source_kvm)) { + if (sev_guest(kvm) || !sev_guest(source_kvm)) { ret = -EINVAL; - goto out_source; + goto out_unlock; } src_sev = &to_kvm_svm(source_kvm)->sev_info; + + /* + * VMs mirroring src's encryption context rely on it to keep the + * ASID allocated, but below we are clearing src_sev->asid. + */ + if (src_sev->num_mirrored_vms) { + ret = -EBUSY; + goto out_unlock; + } + dst_sev->misc_cg = get_current_misc_cg(); cg_cleanup_sev = dst_sev; if (dst_sev->misc_cg != src_sev->misc_cg) { @@ -1729,13 +1752,11 @@ out_dst_cgroup: sev_misc_cg_uncharge(cg_cleanup_sev); put_misc_cg(cg_cleanup_sev->misc_cg); cg_cleanup_sev->misc_cg = NULL; -out_source: - sev_unlock_after_migration(source_kvm); +out_unlock: + sev_unlock_two_vms(kvm, source_kvm); out_fput: if (source_kvm_file) fput(source_kvm_file); -out_unlock: - sev_unlock_after_migration(kvm); return ret; } @@ -1757,7 +1778,7 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp) /* Only the enc_context_owner handles some memory enc operations. */ if (is_mirroring_enc_context(kvm) && - !cmd_allowed_from_miror(sev_cmd.id)) { + !is_cmd_allowed_from_mirror(sev_cmd.id)) { r = -EINVAL; goto out; } @@ -1954,71 +1975,60 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd) { struct file *source_kvm_file; struct kvm *source_kvm; - struct kvm_sev_info source_sev, *mirror_sev; + struct kvm_sev_info *source_sev, *mirror_sev; int ret; source_kvm_file = fget(source_fd); if (!file_is_kvm(source_kvm_file)) { ret = -EBADF; - goto e_source_put; + goto e_source_fput; } source_kvm = source_kvm_file->private_data; - mutex_lock(&source_kvm->lock); - - if (!sev_guest(source_kvm)) { - ret = -EINVAL; - goto e_source_unlock; - } + ret = sev_lock_two_vms(kvm, source_kvm); + if (ret) + goto e_source_fput; - /* Mirrors of mirrors should work, but let's not get silly */ - if (is_mirroring_enc_context(source_kvm) || source_kvm == kvm) { + /* + * Mirrors of mirrors should work, but let's not get silly. Also + * disallow out-of-band SEV/SEV-ES init if the target is already an + * SEV guest, or if vCPUs have been created. KVM relies on vCPUs being + * created after SEV/SEV-ES initialization, e.g. to init intercepts. + */ + if (sev_guest(kvm) || !sev_guest(source_kvm) || + is_mirroring_enc_context(source_kvm) || kvm->created_vcpus) { ret = -EINVAL; - goto e_source_unlock; + goto e_unlock; } - memcpy(&source_sev, &to_kvm_svm(source_kvm)->sev_info, - sizeof(source_sev)); - /* * The mirror kvm holds an enc_context_owner ref so its asid can't * disappear until we're done with it */ + source_sev = &to_kvm_svm(source_kvm)->sev_info; kvm_get_kvm(source_kvm); - - fput(source_kvm_file); - mutex_unlock(&source_kvm->lock); - mutex_lock(&kvm->lock); - - if (sev_guest(kvm)) { - ret = -EINVAL; - goto e_mirror_unlock; - } + source_sev->num_mirrored_vms++; /* Set enc_context_owner and copy its encryption context over */ mirror_sev = &to_kvm_svm(kvm)->sev_info; mirror_sev->enc_context_owner = source_kvm; mirror_sev->active = true; - mirror_sev->asid = source_sev.asid; - mirror_sev->fd = source_sev.fd; - mirror_sev->es_active = source_sev.es_active; - mirror_sev->handle = source_sev.handle; + mirror_sev->asid = source_sev->asid; + mirror_sev->fd = source_sev->fd; + mirror_sev->es_active = source_sev->es_active; + mirror_sev->handle = source_sev->handle; + INIT_LIST_HEAD(&mirror_sev->regions_list); + ret = 0; + /* * Do not copy ap_jump_table. Since the mirror does not share the same * KVM contexts as the original, and they may have different * memory-views. */ - mutex_unlock(&kvm->lock); - return 0; - -e_mirror_unlock: - mutex_unlock(&kvm->lock); - kvm_put_kvm(source_kvm); - return ret; -e_source_unlock: - mutex_unlock(&source_kvm->lock); -e_source_put: +e_unlock: + sev_unlock_two_vms(kvm, source_kvm); +e_source_fput: if (source_kvm_file) fput(source_kvm_file); return ret; @@ -2030,17 +2040,24 @@ void sev_vm_destroy(struct kvm *kvm) struct list_head *head = &sev->regions_list; struct list_head *pos, *q; + WARN_ON(sev->num_mirrored_vms); + if (!sev_guest(kvm)) return; /* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */ if (is_mirroring_enc_context(kvm)) { - kvm_put_kvm(sev->enc_context_owner); + struct kvm *owner_kvm = sev->enc_context_owner; + struct kvm_sev_info *owner_sev = &to_kvm_svm(owner_kvm)->sev_info; + + mutex_lock(&owner_kvm->lock); + if (!WARN_ON(!owner_sev->num_mirrored_vms)) + owner_sev->num_mirrored_vms--; + mutex_unlock(&owner_kvm->lock); + kvm_put_kvm(owner_kvm); return; } - mutex_lock(&kvm->lock); - /* * Ensure that all guest tagged cache entries are flushed before * releasing the pages back to the system for use. CLFLUSH will @@ -2060,8 +2077,6 @@ void sev_vm_destroy(struct kvm *kvm) } } - mutex_unlock(&kvm->lock); - sev_unbind_asid(kvm, sev->handle); sev_asid_free(sev); } @@ -2245,7 +2260,7 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu) __free_page(virt_to_page(svm->sev_es.vmsa)); if (svm->sev_es.ghcb_sa_free) - kfree(svm->sev_es.ghcb_sa); + kvfree(svm->sev_es.ghcb_sa); } static void dump_ghcb(struct vcpu_svm *svm) @@ -2337,24 +2352,29 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm) memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); } -static int sev_es_validate_vmgexit(struct vcpu_svm *svm) +static bool sev_es_validate_vmgexit(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu; struct ghcb *ghcb; - u64 exit_code = 0; + u64 exit_code; + u64 reason; ghcb = svm->sev_es.ghcb; - /* Only GHCB Usage code 0 is supported */ - if (ghcb->ghcb_usage) - goto vmgexit_err; - /* - * Retrieve the exit code now even though is may not be marked valid + * Retrieve the exit code now even though it may not be marked valid * as it could help with debugging. */ exit_code = ghcb_get_sw_exit_code(ghcb); + /* Only GHCB Usage code 0 is supported */ + if (ghcb->ghcb_usage) { + reason = GHCB_ERR_INVALID_USAGE; + goto vmgexit_err; + } + + reason = GHCB_ERR_MISSING_INPUT; + if (!ghcb_sw_exit_code_is_valid(ghcb) || !ghcb_sw_exit_info_1_is_valid(ghcb) || !ghcb_sw_exit_info_2_is_valid(ghcb)) @@ -2433,30 +2453,34 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) case SVM_VMGEXIT_UNSUPPORTED_EVENT: break; default: + reason = GHCB_ERR_INVALID_EVENT; goto vmgexit_err; } - return 0; + return true; vmgexit_err: vcpu = &svm->vcpu; - if (ghcb->ghcb_usage) { + if (reason == GHCB_ERR_INVALID_USAGE) { vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n", ghcb->ghcb_usage); + } else if (reason == GHCB_ERR_INVALID_EVENT) { + vcpu_unimpl(vcpu, "vmgexit: exit code %#llx is not valid\n", + exit_code); } else { - vcpu_unimpl(vcpu, "vmgexit: exit reason %#llx is not valid\n", + vcpu_unimpl(vcpu, "vmgexit: exit code %#llx input is not valid\n", exit_code); dump_ghcb(svm); } - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; - vcpu->run->internal.ndata = 2; - vcpu->run->internal.data[0] = exit_code; - vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; + /* Clear the valid entries fields */ + memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); + + ghcb_set_sw_exit_info_1(ghcb, 2); + ghcb_set_sw_exit_info_2(ghcb, reason); - return -EINVAL; + return false; } void sev_es_unmap_ghcb(struct vcpu_svm *svm) @@ -2478,7 +2502,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm) svm->sev_es.ghcb_sa_sync = false; } - kfree(svm->sev_es.ghcb_sa); + kvfree(svm->sev_es.ghcb_sa); svm->sev_es.ghcb_sa = NULL; svm->sev_es.ghcb_sa_free = false; } @@ -2526,14 +2550,14 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) scratch_gpa_beg = ghcb_get_sw_scratch(ghcb); if (!scratch_gpa_beg) { pr_err("vmgexit: scratch gpa not provided\n"); - return false; + goto e_scratch; } scratch_gpa_end = scratch_gpa_beg + len; if (scratch_gpa_end < scratch_gpa_beg) { pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n", len, scratch_gpa_beg); - return false; + goto e_scratch; } if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) { @@ -2551,7 +2575,7 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) scratch_gpa_end > ghcb_scratch_end) { pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n", scratch_gpa_beg, scratch_gpa_end); - return false; + goto e_scratch; } scratch_va = (void *)svm->sev_es.ghcb; @@ -2564,18 +2588,18 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) if (len > GHCB_SCRATCH_AREA_LIMIT) { pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n", len, GHCB_SCRATCH_AREA_LIMIT); - return false; + goto e_scratch; } - scratch_va = kzalloc(len, GFP_KERNEL_ACCOUNT); + scratch_va = kvzalloc(len, GFP_KERNEL_ACCOUNT); if (!scratch_va) - return false; + goto e_scratch; if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) { /* Unable to copy scratch area from guest */ pr_err("vmgexit: kvm_read_guest for scratch area failed\n"); - kfree(scratch_va); - return false; + kvfree(scratch_va); + goto e_scratch; } /* @@ -2592,6 +2616,12 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) svm->sev_es.ghcb_sa_len = len; return true; + +e_scratch: + ghcb_set_sw_exit_info_1(ghcb, 2); + ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_SCRATCH_AREA); + + return false; } static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask, @@ -2642,7 +2672,7 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_CPUID); if (!ret) { - ret = -EINVAL; + /* Error, keep GHCB MSR value as-is */ break; } @@ -2678,10 +2708,13 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) GHCB_MSR_TERM_REASON_POS); pr_info("SEV-ES guest requested termination: %#llx:%#llx\n", reason_set, reason_code); - fallthrough; + + ret = -EINVAL; + break; } default: - ret = -EINVAL; + /* Error, keep GHCB MSR value as-is */ + break; } trace_kvm_vmgexit_msr_protocol_exit(svm->vcpu.vcpu_id, @@ -2705,14 +2738,18 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) if (!ghcb_gpa) { vcpu_unimpl(vcpu, "vmgexit: GHCB gpa is not set\n"); - return -EINVAL; + + /* Without a GHCB, just return right back to the guest */ + return 1; } if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->sev_es.ghcb_map)) { /* Unable to map GHCB from guest */ vcpu_unimpl(vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n", ghcb_gpa); - return -EINVAL; + + /* Without a GHCB, just return right back to the guest */ + return 1; } svm->sev_es.ghcb = svm->sev_es.ghcb_map.hva; @@ -2722,15 +2759,14 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) exit_code = ghcb_get_sw_exit_code(ghcb); - ret = sev_es_validate_vmgexit(svm); - if (ret) - return ret; + if (!sev_es_validate_vmgexit(svm)) + return 1; sev_es_sync_from_ghcb(svm); ghcb_set_sw_exit_info_1(ghcb, 0); ghcb_set_sw_exit_info_2(ghcb, 0); - ret = -EINVAL; + ret = 1; switch (exit_code) { case SVM_VMGEXIT_MMIO_READ: if (!setup_vmgexit_scratch(svm, true, control->exit_info_2)) @@ -2771,20 +2807,17 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) default: pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n", control->exit_info_1); - ghcb_set_sw_exit_info_1(ghcb, 1); - ghcb_set_sw_exit_info_2(ghcb, - X86_TRAP_UD | - SVM_EVTINJ_TYPE_EXEPT | - SVM_EVTINJ_VALID); + ghcb_set_sw_exit_info_1(ghcb, 2); + ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_INPUT); } - ret = 1; break; } case SVM_VMGEXIT_UNSUPPORTED_EVENT: vcpu_unimpl(vcpu, "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", control->exit_info_1, control->exit_info_2); + ret = -EINVAL; break; default: ret = svm_invoke_exit_handler(vcpu, exit_code); @@ -2806,7 +2839,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) return -EINVAL; if (!setup_vmgexit_scratch(svm, in, bytes)) - return -EINVAL; + return 1; return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->sev_es.ghcb_sa, count, in); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 5630c241d5f6..d0f68d11ec70 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4651,7 +4651,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .load_eoi_exitmap = svm_load_eoi_exitmap, .hwapic_irr_update = svm_hwapic_irr_update, .hwapic_isr_update = svm_hwapic_isr_update, - .sync_pir_to_irr = kvm_lapic_find_highest_irr, .apicv_post_state_restore = avic_post_state_restore, .set_tss_addr = svm_set_tss_addr, diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 437e68504e66..1c7306c370fa 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -79,6 +79,7 @@ struct kvm_sev_info { struct list_head regions_list; /* List of registered regions */ u64 ap_jump_table; /* SEV-ES AP Jump Table address */ struct kvm *enc_context_owner; /* Owner of copied encryption context */ + unsigned long num_mirrored_vms; /* Number of VMs sharing this ASID */ struct misc_cg *misc_cg; /* For misc cgroup accounting */ atomic_t migration_in_progress; }; @@ -247,7 +248,7 @@ static __always_inline bool sev_es_guest(struct kvm *kvm) #ifdef CONFIG_KVM_AMD_SEV struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; - return sev_guest(kvm) && sev->es_active; + return sev->es_active && !WARN_ON_ONCE(!sev->active); #else return false; #endif diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index b213ca966d41..9c941535f78c 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -670,33 +670,39 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { - struct kvm_host_map map; - struct vmcs12 *shadow; + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache; if (!nested_cpu_has_shadow_vmcs(vmcs12) || vmcs12->vmcs_link_pointer == INVALID_GPA) return; - shadow = get_shadow_vmcs12(vcpu); - - if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map)) + if (ghc->gpa != vmcs12->vmcs_link_pointer && + kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, + vmcs12->vmcs_link_pointer, VMCS12_SIZE)) return; - memcpy(shadow, map.hva, VMCS12_SIZE); - kvm_vcpu_unmap(vcpu, &map, false); + kvm_read_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu), + VMCS12_SIZE); } static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { struct vcpu_vmx *vmx = to_vmx(vcpu); + struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache; if (!nested_cpu_has_shadow_vmcs(vmcs12) || vmcs12->vmcs_link_pointer == INVALID_GPA) return; - kvm_write_guest(vmx->vcpu.kvm, vmcs12->vmcs_link_pointer, - get_shadow_vmcs12(vcpu), VMCS12_SIZE); + if (ghc->gpa != vmcs12->vmcs_link_pointer && + kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, + vmcs12->vmcs_link_pointer, VMCS12_SIZE)) + return; + + kvm_write_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu), + VMCS12_SIZE); } /* @@ -1156,29 +1162,26 @@ static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu, WARN_ON(!enable_vpid); /* - * If VPID is enabled and used by vmc12, but L2 does not have a unique - * TLB tag (ASID), i.e. EPT is disabled and KVM was unable to allocate - * a VPID for L2, flush the current context as the effective ASID is - * common to both L1 and L2. - * - * Defer the flush so that it runs after vmcs02.EPTP has been set by - * KVM_REQ_LOAD_MMU_PGD (if nested EPT is enabled) and to avoid - * redundant flushes further down the nested pipeline. - * - * If a TLB flush isn't required due to any of the above, and vpid12 is - * changing then the new "virtual" VPID (vpid12) will reuse the same - * "real" VPID (vpid02), and so needs to be flushed. There's no direct - * mapping between vpid02 and vpid12, vpid02 is per-vCPU and reused for - * all nested vCPUs. Remember, a flush on VM-Enter does not invalidate - * guest-physical mappings, so there is no need to sync the nEPT MMU. + * VPID is enabled and in use by vmcs12. If vpid12 is changing, then + * emulate a guest TLB flush as KVM does not track vpid12 history nor + * is the VPID incorporated into the MMU context. I.e. KVM must assume + * that the new vpid12 has never been used and thus represents a new + * guest ASID that cannot have entries in the TLB. */ - if (!nested_has_guest_tlb_tag(vcpu)) { - kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); - } else if (is_vmenter && - vmcs12->virtual_processor_id != vmx->nested.last_vpid) { + if (is_vmenter && vmcs12->virtual_processor_id != vmx->nested.last_vpid) { vmx->nested.last_vpid = vmcs12->virtual_processor_id; - vpid_sync_context(nested_get_vpid02(vcpu)); + kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu); + return; } + + /* + * If VPID is enabled, used by vmc12, and vpid12 is not changing but + * does not have a unique TLB tag (ASID), i.e. EPT is disabled and + * KVM was unable to allocate a VPID for L2, flush the current context + * as the effective ASID is common to both L1 and L2. + */ + if (!nested_has_guest_tlb_tag(vcpu)) + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); } static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask) @@ -2588,8 +2591,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) && WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, - vmcs12->guest_ia32_perf_global_ctrl))) + vmcs12->guest_ia32_perf_global_ctrl))) { + *entry_failure_code = ENTRY_FAIL_DEFAULT; return -EINVAL; + } kvm_rsp_write(vcpu, vmcs12->guest_rsp); kvm_rip_write(vcpu, vmcs12->guest_rip); @@ -2830,6 +2835,17 @@ static int nested_vmx_check_controls(struct kvm_vcpu *vcpu, return 0; } +static int nested_vmx_check_address_space_size(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ +#ifdef CONFIG_X86_64 + if (CC(!!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) != + !!(vcpu->arch.efer & EFER_LMA))) + return -EINVAL; +#endif + return 0; +} + static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { @@ -2854,18 +2870,16 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, return -EINVAL; #ifdef CONFIG_X86_64 - ia32e = !!(vcpu->arch.efer & EFER_LMA); + ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE); #else ia32e = false; #endif if (ia32e) { - if (CC(!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)) || - CC(!(vmcs12->host_cr4 & X86_CR4_PAE))) + if (CC(!(vmcs12->host_cr4 & X86_CR4_PAE))) return -EINVAL; } else { - if (CC(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) || - CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) || + if (CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) || CC(vmcs12->host_cr4 & X86_CR4_PCIDE) || CC((vmcs12->host_rip) >> 32)) return -EINVAL; @@ -2910,9 +2924,9 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { - int r = 0; - struct vmcs12 *shadow; - struct kvm_host_map map; + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache; + struct vmcs_hdr hdr; if (vmcs12->vmcs_link_pointer == INVALID_GPA) return 0; @@ -2920,17 +2934,21 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu, if (CC(!page_address_valid(vcpu, vmcs12->vmcs_link_pointer))) return -EINVAL; - if (CC(kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map))) - return -EINVAL; + if (ghc->gpa != vmcs12->vmcs_link_pointer && + CC(kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, + vmcs12->vmcs_link_pointer, VMCS12_SIZE))) + return -EINVAL; - shadow = map.hva; + if (CC(kvm_read_guest_offset_cached(vcpu->kvm, ghc, &hdr, + offsetof(struct vmcs12, hdr), + sizeof(hdr)))) + return -EINVAL; - if (CC(shadow->hdr.revision_id != VMCS12_REVISION) || - CC(shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12))) - r = -EINVAL; + if (CC(hdr.revision_id != VMCS12_REVISION) || + CC(hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12))) + return -EINVAL; - kvm_vcpu_unmap(vcpu, &map, false); - return r; + return 0; } /* @@ -3325,8 +3343,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, }; u32 failed_index; - if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) - kvm_vcpu_flush_tlb_current(vcpu); + kvm_service_local_tlb_flush_requests(vcpu); evaluate_pending_interrupts = exec_controls_get(vmx) & (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING); @@ -3535,6 +3552,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) if (nested_vmx_check_controls(vcpu, vmcs12)) return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); + if (nested_vmx_check_address_space_size(vcpu, vmcs12)) + return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); + if (nested_vmx_check_host_state(vcpu, vmcs12)) return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); @@ -4480,9 +4500,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, (void)nested_get_evmcs_page(vcpu); } - /* Service the TLB flush request for L2 before switching to L1. */ - if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) - kvm_vcpu_flush_tlb_current(vcpu); + /* Service pending TLB flush requests for L2 before switching to L1. */ + kvm_service_local_tlb_flush_requests(vcpu); /* * VCPU_EXREG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between @@ -4835,6 +4854,7 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) if (!vmx->nested.cached_vmcs12) goto out_cached_vmcs12; + vmx->nested.shadow_vmcs12_cache.gpa = INVALID_GPA; vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT); if (!vmx->nested.cached_shadow_vmcs12) goto out_cached_shadow_vmcs12; @@ -5264,10 +5284,10 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) return 1; if (vmx->nested.current_vmptr != vmptr) { - struct kvm_host_map map; - struct vmcs12 *new_vmcs12; + struct gfn_to_hva_cache *ghc = &vmx->nested.vmcs12_cache; + struct vmcs_hdr hdr; - if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmptr), &map)) { + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, vmptr, VMCS12_SIZE)) { /* * Reads from an unbacked page return all 1s, * which means that the 32 bits located at the @@ -5278,12 +5298,16 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); } - new_vmcs12 = map.hva; + if (kvm_read_guest_offset_cached(vcpu->kvm, ghc, &hdr, + offsetof(struct vmcs12, hdr), + sizeof(hdr))) { + return nested_vmx_fail(vcpu, + VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); + } - if (new_vmcs12->hdr.revision_id != VMCS12_REVISION || - (new_vmcs12->hdr.shadow_vmcs && + if (hdr.revision_id != VMCS12_REVISION || + (hdr.shadow_vmcs && !nested_cpu_has_vmx_shadow_vmcs(vcpu))) { - kvm_vcpu_unmap(vcpu, &map, false); return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); } @@ -5294,8 +5318,11 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) * Load VMCS12 from guest memory since it is not already * cached. */ - memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE); - kvm_vcpu_unmap(vcpu, &map, false); + if (kvm_read_guest_cached(vcpu->kvm, ghc, vmx->nested.cached_vmcs12, + VMCS12_SIZE)) { + return nested_vmx_fail(vcpu, + VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); + } set_current_vmptr(vmx, vmptr); } diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index 5f81ef092bd4..1c94783b5a54 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -5,6 +5,7 @@ #include <asm/cpu.h> #include "lapic.h" +#include "irq.h" #include "posted_intr.h" #include "trace.h" #include "vmx.h" @@ -77,13 +78,18 @@ after_clear_sn: pi_set_on(pi_desc); } +static bool vmx_can_use_vtd_pi(struct kvm *kvm) +{ + return irqchip_in_kernel(kvm) && enable_apicv && + kvm_arch_has_assigned_device(kvm) && + irq_remapping_cap(IRQ_POSTING_CAP); +} + void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) { struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP) || - !kvm_vcpu_apicv_active(vcpu)) + if (!vmx_can_use_vtd_pi(vcpu->kvm)) return; /* Set SN when the vCPU is preempted */ @@ -141,9 +147,7 @@ int pi_pre_block(struct kvm_vcpu *vcpu) struct pi_desc old, new; struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP) || - !kvm_vcpu_apicv_active(vcpu)) + if (!vmx_can_use_vtd_pi(vcpu->kvm)) return 0; WARN_ON(irqs_disabled()); @@ -270,9 +274,7 @@ int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, struct vcpu_data vcpu_info; int idx, ret = 0; - if (!kvm_arch_has_assigned_device(kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP) || - !kvm_vcpu_apicv_active(kvm->vcpus[0])) + if (!vmx_can_use_vtd_pi(kvm)) return 0; idx = srcu_read_lock(&kvm->irq_srcu); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index ba66c171d951..5aadad3e7367 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2646,15 +2646,6 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) if (!loaded_vmcs->msr_bitmap) goto out_vmcs; memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); - - if (IS_ENABLED(CONFIG_HYPERV) && - static_branch_unlikely(&enable_evmcs) && - (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) { - struct hv_enlightened_vmcs *evmcs = - (struct hv_enlightened_vmcs *)loaded_vmcs->vmcs; - - evmcs->hv_enlightenments_control.msr_bitmap = 1; - } } memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state)); @@ -2918,6 +2909,13 @@ static void vmx_flush_tlb_all(struct kvm_vcpu *vcpu) } } +static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu) +{ + if (is_guest_mode(vcpu)) + return nested_get_vpid02(vcpu); + return to_vmx(vcpu)->vpid; +} + static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.mmu; @@ -2930,31 +2928,29 @@ static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu) if (enable_ept) ept_sync_context(construct_eptp(vcpu, root_hpa, mmu->shadow_root_level)); - else if (!is_guest_mode(vcpu)) - vpid_sync_context(to_vmx(vcpu)->vpid); else - vpid_sync_context(nested_get_vpid02(vcpu)); + vpid_sync_context(vmx_get_current_vpid(vcpu)); } static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr) { /* - * vpid_sync_vcpu_addr() is a nop if vmx->vpid==0, see the comment in + * vpid_sync_vcpu_addr() is a nop if vpid==0, see the comment in * vmx_flush_tlb_guest() for an explanation of why this is ok. */ - vpid_sync_vcpu_addr(to_vmx(vcpu)->vpid, addr); + vpid_sync_vcpu_addr(vmx_get_current_vpid(vcpu), addr); } static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu) { /* - * vpid_sync_context() is a nop if vmx->vpid==0, e.g. if enable_vpid==0 - * or a vpid couldn't be allocated for this vCPU. VM-Enter and VM-Exit - * are required to flush GVA->{G,H}PA mappings from the TLB if vpid is + * vpid_sync_context() is a nop if vpid==0, e.g. if enable_vpid==0 or a + * vpid couldn't be allocated for this vCPU. VM-Enter and VM-Exit are + * required to flush GVA->{G,H}PA mappings from the TLB if vpid is * disabled (VM-Enter with vpid enabled and vpid==0 is disallowed), * i.e. no explicit INVVPID is necessary. */ - vpid_sync_context(to_vmx(vcpu)->vpid); + vpid_sync_context(vmx_get_current_vpid(vcpu)); } void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu) @@ -6262,9 +6258,9 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); int max_irr; - bool max_irr_updated; + bool got_posted_interrupt; - if (KVM_BUG_ON(!vcpu->arch.apicv_active, vcpu->kvm)) + if (KVM_BUG_ON(!enable_apicv, vcpu->kvm)) return -EIO; if (pi_test_on(&vmx->pi_desc)) { @@ -6274,22 +6270,33 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) * But on x86 this is just a compiler barrier anyway. */ smp_mb__after_atomic(); - max_irr_updated = + got_posted_interrupt = kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr); - - /* - * If we are running L2 and L1 has a new pending interrupt - * which can be injected, this may cause a vmexit or it may - * be injected into L2. Either way, this interrupt will be - * processed via KVM_REQ_EVENT, not RVI, because we do not use - * virtual interrupt delivery to inject L1 interrupts into L2. - */ - if (is_guest_mode(vcpu) && max_irr_updated) - kvm_make_request(KVM_REQ_EVENT, vcpu); } else { max_irr = kvm_lapic_find_highest_irr(vcpu); + got_posted_interrupt = false; } - vmx_hwapic_irr_update(vcpu, max_irr); + + /* + * Newly recognized interrupts are injected via either virtual interrupt + * delivery (RVI) or KVM_REQ_EVENT. Virtual interrupt delivery is + * disabled in two cases: + * + * 1) If L2 is running and the vCPU has a new pending interrupt. If L1 + * wants to exit on interrupts, KVM_REQ_EVENT is needed to synthesize a + * VM-Exit to L1. If L1 doesn't want to exit, the interrupt is injected + * into L2, but KVM doesn't use virtual interrupt delivery to inject + * interrupts into L2, and so KVM_REQ_EVENT is again needed. + * + * 2) If APICv is disabled for this vCPU, assigned devices may still + * attempt to post interrupts. The posted interrupt vector will cause + * a VM-Exit and the subsequent entry will call sync_pir_to_irr. + */ + if (!is_guest_mode(vcpu) && kvm_vcpu_apicv_active(vcpu)) + vmx_set_rvi(max_irr); + else if (got_posted_interrupt) + kvm_make_request(KVM_REQ_EVENT, vcpu); + return max_irr; } @@ -6826,6 +6833,19 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) if (err < 0) goto free_pml; + /* + * Use Hyper-V 'Enlightened MSR Bitmap' feature when KVM runs as a + * nested (L1) hypervisor and Hyper-V in L0 supports it. Enable the + * feature only for vmcs01, KVM currently isn't equipped to realize any + * performance benefits from enabling it for vmcs02. + */ + if (IS_ENABLED(CONFIG_HYPERV) && static_branch_unlikely(&enable_evmcs) && + (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) { + struct hv_enlightened_vmcs *evmcs = (void *)vmx->vmcs01.vmcs; + + evmcs->hv_enlightenments_control.msr_bitmap = 1; + } + /* The MSR bitmap starts with all ones */ bitmap_fill(vmx->shadow_msr_intercept.read, MAX_POSSIBLE_PASSTHROUGH_MSRS); bitmap_fill(vmx->shadow_msr_intercept.write, MAX_POSSIBLE_PASSTHROUGH_MSRS); @@ -7509,6 +7529,7 @@ static void hardware_unsetup(void) static bool vmx_check_apicv_inhibit_reasons(ulong bit) { ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) | + BIT(APICV_INHIBIT_REASON_ABSENT) | BIT(APICV_INHIBIT_REASON_HYPERV) | BIT(APICV_INHIBIT_REASON_BLOCKIRQ); @@ -7761,10 +7782,10 @@ static __init int hardware_setup(void) ple_window_shrink = 0; } - if (!cpu_has_vmx_apicv()) { + if (!cpu_has_vmx_apicv()) enable_apicv = 0; + if (!enable_apicv) vmx_x86_ops.sync_pir_to_irr = NULL; - } if (cpu_has_vmx_tsc_scaling()) { kvm_has_tsc_control = true; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index a4ead6023133..4df2ac24ffc1 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -142,6 +142,16 @@ struct nested_vmx { struct vmcs12 *cached_shadow_vmcs12; /* + * GPA to HVA cache for accessing vmcs12->vmcs_link_pointer + */ + struct gfn_to_hva_cache shadow_vmcs12_cache; + + /* + * GPA to HVA cache for VMCS12 + */ + struct gfn_to_hva_cache vmcs12_cache; + + /* * Indicates if the shadow vmcs or enlightened vmcs must be updated * with the data held by struct vmcs12. */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index dc7eb5fddfd3..0cf1082455df 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -890,7 +890,8 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) !load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu))) return 1; - if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) + if (!(cr0 & X86_CR0_PG) && + (is_64_bit_mode(vcpu) || kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))) return 1; static_call(kvm_x86_set_cr0)(vcpu, cr0); @@ -3258,6 +3259,29 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu) static_call(kvm_x86_tlb_flush_guest)(vcpu); } + +static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu) +{ + ++vcpu->stat.tlb_flush; + static_call(kvm_x86_tlb_flush_current)(vcpu); +} + +/* + * Service "local" TLB flush requests, which are specific to the current MMU + * context. In addition to the generic event handling in vcpu_enter_guest(), + * TLB flushes that are targeted at an MMU context also need to be serviced + * prior before nested VM-Enter/VM-Exit. + */ +void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu) +{ + if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) + kvm_vcpu_flush_tlb_current(vcpu); + + if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu)) + kvm_vcpu_flush_tlb_guest(vcpu); +} +EXPORT_SYMBOL_GPL(kvm_service_local_tlb_flush_requests); + static void record_steal_time(struct kvm_vcpu *vcpu) { struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache; @@ -3307,9 +3331,9 @@ static void record_steal_time(struct kvm_vcpu *vcpu) "xor %1, %1\n" "2:\n" _ASM_EXTABLE_UA(1b, 2b) - : "+r" (st_preempted), - "+&r" (err) - : "m" (st->preempted)); + : "+q" (st_preempted), + "+&r" (err), + "+m" (st->preempted)); if (err) goto out; @@ -4133,6 +4157,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_SGX_ATTRIBUTE: #endif case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM: + case KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM: case KVM_CAP_SREGS2: case KVM_CAP_EXIT_ON_EMULATION_FAILURE: case KVM_CAP_VCPU_ATTRIBUTES: @@ -4179,7 +4204,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = !static_call(kvm_x86_cpu_has_accelerated_tpr)(); break; case KVM_CAP_NR_VCPUS: - r = num_online_cpus(); + r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS); break; case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; @@ -4448,8 +4473,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { - if (vcpu->arch.apicv_active) - static_call(kvm_x86_sync_pir_to_irr)(vcpu); + static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu); return kvm_apic_get_state(vcpu, s); } @@ -5124,6 +5148,17 @@ long kvm_arch_vcpu_ioctl(struct file *filp, struct kvm_cpuid __user *cpuid_arg = argp; struct kvm_cpuid cpuid; + /* + * KVM does not correctly handle changing guest CPUID after KVM_RUN, as + * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't + * tracked in kvm_mmu_page_role. As a result, KVM may miss guest page + * faults due to reusing SPs/SPTEs. In practice no sane VMM mucks with + * the core vCPU model on the fly, so fail. + */ + r = -EINVAL; + if (vcpu->arch.last_vmentry_cpu != -1) + goto out; + r = -EFAULT; if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid))) goto out; @@ -5134,6 +5169,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp, struct kvm_cpuid2 __user *cpuid_arg = argp; struct kvm_cpuid2 cpuid; + /* + * KVM_SET_CPUID{,2} after KVM_RUN is forbidded, see the comment in + * KVM_SET_CPUID case above. + */ + r = -EINVAL; + if (vcpu->arch.last_vmentry_cpu != -1) + goto out; + r = -EFAULT; if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid))) goto out; @@ -5698,6 +5741,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, smp_wmb(); kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT; kvm->arch.nr_reserved_ioapic_pins = cap->args[0]; + kvm_request_apicv_update(kvm, true, APICV_INHIBIT_REASON_ABSENT); r = 0; split_irqchip_unlock: mutex_unlock(&kvm->lock); @@ -6078,6 +6122,7 @@ set_identity_unlock: /* Write kvm->irq_routing before enabling irqchip_in_kernel. */ smp_wmb(); kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL; + kvm_request_apicv_update(kvm, true, APICV_INHIBIT_REASON_ABSENT); create_irqchip_unlock: mutex_unlock(&kvm->lock); break; @@ -7077,7 +7122,13 @@ static int emulator_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port, void *val, unsigned int count) { if (vcpu->arch.pio.count) { - /* Complete previous iteration. */ + /* + * Complete a previous iteration that required userspace I/O. + * Note, @count isn't guaranteed to match pio.count as userspace + * can modify ECX before rerunning the vCPU. Ignore any such + * shenanigans as KVM doesn't support modifying the rep count, + * and the emulator ensures @count doesn't overflow the buffer. + */ } else { int r = __emulator_pio_in(vcpu, size, port, count); if (!r) @@ -7086,7 +7137,6 @@ static int emulator_pio_in(struct kvm_vcpu *vcpu, int size, /* Results already available, fall through. */ } - WARN_ON(count != vcpu->arch.pio.count); complete_emulator_pio_in(vcpu, val); return 1; } @@ -8776,10 +8826,9 @@ static void kvm_apicv_init(struct kvm *kvm) { init_rwsem(&kvm->arch.apicv_update_lock); - if (enable_apicv) - clear_bit(APICV_INHIBIT_REASON_DISABLE, - &kvm->arch.apicv_inhibit_reasons); - else + set_bit(APICV_INHIBIT_REASON_ABSENT, + &kvm->arch.apicv_inhibit_reasons); + if (!enable_apicv) set_bit(APICV_INHIBIT_REASON_DISABLE, &kvm->arch.apicv_inhibit_reasons); } @@ -8848,7 +8897,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) trace_kvm_hypercall(nr, a0, a1, a2, a3); - op_64_bit = is_64_bit_mode(vcpu); + op_64_bit = is_64_bit_hypercall(vcpu); if (!op_64_bit) { nr &= 0xFFFFFFFF; a0 &= 0xFFFFFFFF; @@ -9528,8 +9577,7 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) if (irqchip_split(vcpu->kvm)) kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors); else { - if (vcpu->arch.apicv_active) - static_call(kvm_x86_sync_pir_to_irr)(vcpu); + static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu); if (ioapic_in_kernel(vcpu->kvm)) kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); } @@ -9547,12 +9595,16 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu) if (!kvm_apic_hw_enabled(vcpu->arch.apic)) return; - if (to_hv_vcpu(vcpu)) + if (to_hv_vcpu(vcpu)) { bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors, to_hv_synic(vcpu)->vec_bitmap, 256); + static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap); + return; + } - static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap); + static_call(kvm_x86_load_eoi_exitmap)( + vcpu, (u64 *)vcpu->arch.ioapic_handled_vectors); } void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, @@ -9644,10 +9696,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) /* Flushing all ASIDs flushes the current ASID... */ kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); } - if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) - kvm_vcpu_flush_tlb_current(vcpu); - if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu)) - kvm_vcpu_flush_tlb_guest(vcpu); + kvm_service_local_tlb_flush_requests(vcpu); if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) { vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; @@ -9798,10 +9847,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) /* * This handles the case where a posted interrupt was - * notified with kvm_vcpu_kick. + * notified with kvm_vcpu_kick. Assigned devices can + * use the POSTED_INTR_VECTOR even if APICv is disabled, + * so do it even if APICv is disabled on this vCPU. */ - if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active) - static_call(kvm_x86_sync_pir_to_irr)(vcpu); + if (kvm_lapic_enabled(vcpu)) + static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu); if (kvm_vcpu_exit_request(vcpu)) { vcpu->mode = OUTSIDE_GUEST_MODE; @@ -9845,8 +9896,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST)) break; - if (vcpu->arch.apicv_active) - static_call(kvm_x86_sync_pir_to_irr)(vcpu); + if (kvm_lapic_enabled(vcpu)) + static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu); if (unlikely(kvm_vcpu_exit_request(vcpu))) { exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index ea264c4502e4..4abcd8d9836d 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -103,6 +103,7 @@ static inline unsigned int __shrink_ple_window(unsigned int val, #define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL +void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu); int kvm_check_nested_events(struct kvm_vcpu *vcpu); static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) @@ -153,12 +154,24 @@ static inline bool is_64_bit_mode(struct kvm_vcpu *vcpu) { int cs_db, cs_l; + WARN_ON_ONCE(vcpu->arch.guest_state_protected); + if (!is_long_mode(vcpu)) return false; static_call(kvm_x86_get_cs_db_l_bits)(vcpu, &cs_db, &cs_l); return cs_l; } +static inline bool is_64_bit_hypercall(struct kvm_vcpu *vcpu) +{ + /* + * If running with protected guest state, the CS register is not + * accessible. The hypercall register values will have had to been + * provided in 64-bit mode, so assume the guest is in 64-bit. + */ + return vcpu->arch.guest_state_protected || is_64_bit_mode(vcpu); +} + static inline bool x86_exception_has_error_code(unsigned int vector) { static u32 exception_has_error_code = BIT(DF_VECTOR) | BIT(TS_VECTOR) | @@ -173,12 +186,6 @@ static inline bool mmu_is_nested(struct kvm_vcpu *vcpu) return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu; } -static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu) -{ - ++vcpu->stat.tlb_flush; - static_call(kvm_x86_tlb_flush_current)(vcpu); -} - static inline int is_pae(struct kvm_vcpu *vcpu) { return kvm_read_cr4_bits(vcpu, X86_CR4_PAE); diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 8f62baebd028..dff2bdf9507a 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -127,9 +127,9 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) state_entry_time = vx->runstate_entry_time; state_entry_time |= XEN_RUNSTATE_UPDATE; - BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state_entry_time) != + BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) != sizeof(state_entry_time)); - BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state_entry_time) != + BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) != sizeof(state_entry_time)); if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, @@ -144,9 +144,9 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) */ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != offsetof(struct compat_vcpu_runstate_info, state)); - BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) != + BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state) != sizeof(vx->current_runstate)); - BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state) != + BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) != sizeof(vx->current_runstate)); if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, @@ -163,9 +163,9 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) offsetof(struct vcpu_runstate_info, time) - sizeof(u64)); BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) != offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64)); - BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) != - sizeof(((struct compat_vcpu_runstate_info *)0)->time)); - BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) != + BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != + sizeof_field(struct compat_vcpu_runstate_info, time)); + BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != sizeof(vx->runstate_times)); if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, @@ -205,9 +205,9 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) != offsetof(struct compat_vcpu_info, evtchn_upcall_pending)); BUILD_BUG_ON(sizeof(rc) != - sizeof(((struct vcpu_info *)0)->evtchn_upcall_pending)); + sizeof_field(struct vcpu_info, evtchn_upcall_pending)); BUILD_BUG_ON(sizeof(rc) != - sizeof(((struct compat_vcpu_info *)0)->evtchn_upcall_pending)); + sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending)); /* * For efficiency, this mirrors the checks for using the valid @@ -299,7 +299,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) break; case KVM_XEN_ATTR_TYPE_SHARED_INFO: - data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_gfn); + data->u.shared_info.gfn = kvm->arch.xen.shinfo_gfn; r = 0; break; @@ -698,7 +698,7 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) kvm_hv_hypercall_enabled(vcpu)) return kvm_hv_hypercall(vcpu); - longmode = is_64_bit_mode(vcpu); + longmode = is_64_bit_hypercall(vcpu); if (!longmode) { params[0] = (u32)kvm_rbx_read(vcpu); params[1] = (u32)kvm_rcx_read(vcpu); diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index b15ebfe40a73..b0b848d6933a 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -277,7 +277,8 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) return; } - new = early_memremap(data.phys_map, data.size); + new = early_memremap_prot(data.phys_map, data.size, + pgprot_val(pgprot_encrypted(FIXMAP_PAGE_NORMAL))); if (!new) { pr_err("Failed to map new boot services memmap\n"); return; diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index 4a3da7592b99..38d24d2ab38b 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -72,6 +72,7 @@ static void __init setup_real_mode(void) #ifdef CONFIG_X86_64 u64 *trampoline_pgd; u64 efer; + int i; #endif base = (unsigned char *)real_mode_header; @@ -128,8 +129,17 @@ static void __init setup_real_mode(void) trampoline_header->flags = 0; trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); + + /* Map the real mode stub as virtual == physical */ trampoline_pgd[0] = trampoline_pgd_entry.pgd; - trampoline_pgd[511] = init_top_pgt[511].pgd; + + /* + * Include the entirety of the kernel mapping into the trampoline + * PGD. This way, all mappings present in the normal kernel page + * tables are usable while running on trampoline_pgd. + */ + for (i = pgd_index(__PAGE_OFFSET); i < PTRS_PER_PGD; i++) + trampoline_pgd[i] = init_top_pgt[i].pgd; #endif sme_sev_setup_real_mode(trampoline_header); diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 220dd9678494..444d824775f6 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -20,6 +20,7 @@ #include <linux/init.h> #include <linux/linkage.h> +#include <../entry/calling.h> .pushsection .noinstr.text, "ax" /* @@ -193,6 +194,25 @@ SYM_CODE_START(xen_iret) SYM_CODE_END(xen_iret) /* + * XEN pv doesn't use trampoline stack, PER_CPU_VAR(cpu_tss_rw + TSS_sp0) is + * also the kernel stack. Reusing swapgs_restore_regs_and_return_to_usermode() + * in XEN pv would cause %rsp to move up to the top of the kernel stack and + * leave the IRET frame below %rsp, which is dangerous to be corrupted if #NMI + * interrupts. And swapgs_restore_regs_and_return_to_usermode() pushing the IRET + * frame at the same address is useless. + */ +SYM_CODE_START(xenpv_restore_regs_and_return_to_usermode) + UNWIND_HINT_REGS + POP_REGS + + /* stackleak_erase() can work safely on the kernel stack. */ + STACKLEAK_ERASE_NOCLOBBER + + addq $8, %rsp /* skip regs->orig_ax */ + jmp xen_iret +SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode) + +/* * Xen handles syscall callbacks much like ordinary exceptions, which * means we have: * - kernel gs diff --git a/arch/xtensa/include/asm/cacheflush.h b/arch/xtensa/include/asm/cacheflush.h index a8a041609c5d..7b4359312c25 100644 --- a/arch/xtensa/include/asm/cacheflush.h +++ b/arch/xtensa/include/asm/cacheflush.h @@ -121,7 +121,6 @@ void flush_cache_page(struct vm_area_struct*, #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 void flush_dcache_page(struct page *); -void flush_dcache_folio(struct folio *); void local_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); @@ -138,9 +137,7 @@ void local_flush_cache_page(struct vm_area_struct *vma, #define flush_cache_vunmap(start,end) do { } while (0) #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 -#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO #define flush_dcache_page(page) do { } while (0) -static inline void flush_dcache_folio(struct folio *folio) { } #define flush_icache_range local_flush_icache_range #define flush_cache_page(vma, addr, pfn) do { } while (0) diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 104b327f8ac9..3e3e1a506bed 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -419,3 +419,4 @@ 446 common landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv diff --git a/block/bdev.c b/block/bdev.c index b4dab2fb6a74..b1d087e5e205 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -753,8 +753,7 @@ struct block_device *blkdev_get_no_open(dev_t dev) if (!bdev) return NULL; - if ((bdev->bd_disk->flags & GENHD_FL_HIDDEN) || - !try_module_get(bdev->bd_disk->fops->owner)) { + if ((bdev->bd_disk->flags & GENHD_FL_HIDDEN)) { put_device(&bdev->bd_device); return NULL; } @@ -764,7 +763,6 @@ struct block_device *blkdev_get_no_open(dev_t dev) void blkdev_put_no_open(struct block_device *bdev) { - module_put(bdev->bd_disk->fops->owner); put_device(&bdev->bd_device); } @@ -820,12 +818,14 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) ret = -ENXIO; if (!disk_live(disk)) goto abort_claiming; + if (!try_module_get(disk->fops->owner)) + goto abort_claiming; if (bdev_is_partition(bdev)) ret = blkdev_get_part(bdev, mode); else ret = blkdev_get_whole(bdev, mode); if (ret) - goto abort_claiming; + goto put_module; if (mode & FMODE_EXCL) { bd_finish_claiming(bdev, holder); @@ -847,7 +847,8 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) if (unblock_events) disk_unblock_events(disk); return bdev; - +put_module: + module_put(disk->fops->owner); abort_claiming: if (mode & FMODE_EXCL) bd_abort_claiming(bdev, holder); @@ -956,6 +957,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode) blkdev_put_whole(bdev, mode); mutex_unlock(&disk->open_mutex); + module_put(disk->fops->owner); blkdev_put_no_open(bdev); } EXPORT_SYMBOL(blkdev_put); diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 88b1fce90520..663aabfeba18 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -640,7 +640,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, */ ret = blk_queue_enter(q, 0); if (ret) - return ret; + goto fail; rcu_read_lock(); spin_lock_irq(&q->queue_lock); @@ -676,13 +676,13 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, new_blkg = blkg_alloc(pos, q, GFP_KERNEL); if (unlikely(!new_blkg)) { ret = -ENOMEM; - goto fail; + goto fail_exit_queue; } if (radix_tree_preload(GFP_KERNEL)) { blkg_free(new_blkg); ret = -ENOMEM; - goto fail; + goto fail_exit_queue; } rcu_read_lock(); @@ -722,9 +722,10 @@ fail_preloaded: fail_unlock: spin_unlock_irq(&q->queue_lock); rcu_read_unlock(); +fail_exit_queue: + blk_queue_exit(q); fail: blkdev_put_no_open(bdev); - blk_queue_exit(q); /* * If queue was bypassing, we should retry. Do so after a * short msleep(). It isn't strictly necessary but queue diff --git a/block/blk-core.c b/block/blk-core.c index 9ee32f85d74e..1378d084c770 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -363,8 +363,10 @@ void blk_cleanup_queue(struct request_queue *q) blk_queue_flag_set(QUEUE_FLAG_DEAD, q); blk_sync_queue(q); - if (queue_is_mq(q)) + if (queue_is_mq(q)) { + blk_mq_cancel_work_sync(q); blk_mq_exit_queue(q); + } /* * In theory, request pool of sched_tags belongs to request queue. @@ -1015,6 +1017,7 @@ EXPORT_SYMBOL(submit_bio); /** * bio_poll - poll for BIO completions * @bio: bio to poll for + * @iob: batches of IO * @flags: BLK_POLL_* flags that control the behavior * * Poll for completions on queue associated with the bio. Returns number of diff --git a/block/blk-flush.c b/block/blk-flush.c index 8e364bda5166..1fce6d16e6d3 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -379,7 +379,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error) * @rq is being submitted. Analyze what needs to be done and put it on the * right queue. */ -bool blk_insert_flush(struct request *rq) +void blk_insert_flush(struct request *rq) { struct request_queue *q = rq->q; unsigned long fflags = q->queue_flags; /* may change, cache */ @@ -409,7 +409,7 @@ bool blk_insert_flush(struct request *rq) */ if (!policy) { blk_mq_end_request(rq, 0); - return true; + return; } BUG_ON(rq->bio != rq->biotail); /*assumes zero or single bio rq */ @@ -420,8 +420,10 @@ bool blk_insert_flush(struct request *rq) * for normal execution. */ if ((policy & REQ_FSEQ_DATA) && - !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) - return false; + !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { + blk_mq_request_bypass_insert(rq, false, true); + return; + } /* * @rq should go through flush machinery. Mark it part of flush @@ -437,8 +439,6 @@ bool blk_insert_flush(struct request *rq) spin_lock_irq(&fq->mq_flush_lock); blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0); spin_unlock_irq(&fq->mq_flush_lock); - - return true; } /** diff --git a/block/blk-mq.c b/block/blk-mq.c index 3ab34c4f20da..8874a63ae952 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -860,13 +860,14 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob) if (iob->need_ts) __blk_mq_end_request_acct(rq, now); + rq_qos_done(rq->q, rq); + WRITE_ONCE(rq->state, MQ_RQ_IDLE); if (!refcount_dec_and_test(&rq->ref)) continue; blk_crypto_free_request(rq); blk_pm_mark_last_busy(rq); - rq_qos_done(rq->q, rq); if (nr_tags == TAG_COMP_BATCH || cur_hctx != rq->mq_hctx) { if (cur_hctx) @@ -2543,8 +2544,7 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q, return NULL; } -static inline bool blk_mq_can_use_cached_rq(struct request *rq, - struct bio *bio) +static inline bool blk_mq_can_use_cached_rq(struct request *rq, struct bio *bio) { if (blk_mq_get_hctx_type(bio->bi_opf) != rq->mq_hctx->type) return false; @@ -2565,7 +2565,6 @@ static inline struct request *blk_mq_get_request(struct request_queue *q, bool checked = false; if (plug) { - rq = rq_list_peek(&plug->cached_rq); if (rq && rq->q == q) { if (unlikely(!submit_bio_checks(bio))) @@ -2587,12 +2586,14 @@ static inline struct request *blk_mq_get_request(struct request_queue *q, fallback: if (unlikely(bio_queue_enter(bio))) return NULL; - if (!checked && !submit_bio_checks(bio)) - return NULL; + if (unlikely(!checked && !submit_bio_checks(bio))) + goto out_put; rq = blk_mq_get_new_requests(q, plug, bio, nsegs, same_queue_rq); - if (!rq) - blk_queue_exit(q); - return rq; + if (rq) + return rq; +out_put: + blk_queue_exit(q); + return NULL; } /** @@ -2647,8 +2648,10 @@ void blk_mq_submit_bio(struct bio *bio) return; } - if (op_is_flush(bio->bi_opf) && blk_insert_flush(rq)) + if (op_is_flush(bio->bi_opf)) { + blk_insert_flush(rq); return; + } if (plug && (q->nr_hw_queues == 1 || blk_mq_is_shared_tags(rq->mq_hctx->flags) || @@ -4417,6 +4420,19 @@ unsigned int blk_mq_rq_cpu(struct request *rq) } EXPORT_SYMBOL(blk_mq_rq_cpu); +void blk_mq_cancel_work_sync(struct request_queue *q) +{ + if (queue_is_mq(q)) { + struct blk_mq_hw_ctx *hctx; + int i; + + cancel_delayed_work_sync(&q->requeue_work); + + queue_for_each_hw_ctx(q, hctx, i) + cancel_delayed_work_sync(&hctx->run_work); + } +} + static int __init blk_mq_init(void) { int i; diff --git a/block/blk-mq.h b/block/blk-mq.h index 8acfa650f575..afcf9931a489 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -128,6 +128,8 @@ extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); void blk_mq_free_plug_rqs(struct blk_plug *plug); void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); +void blk_mq_cancel_work_sync(struct request_queue *q); + void blk_mq_release(struct request_queue *q); static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q, diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index cef1f713370b..cd75b0f73dc6 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -791,16 +791,6 @@ static void blk_release_queue(struct kobject *kobj) blk_free_queue_stats(q->stats); - if (queue_is_mq(q)) { - struct blk_mq_hw_ctx *hctx; - int i; - - cancel_delayed_work_sync(&q->requeue_work); - - queue_for_each_hw_ctx(q, hctx, i) - cancel_delayed_work_sync(&hctx->run_work); - } - blk_exit_queue(q); blk_queue_free_zone_bitmaps(q); diff --git a/block/blk.h b/block/blk.h index b4fed2033e48..ccde6e6f1736 100644 --- a/block/blk.h +++ b/block/blk.h @@ -271,7 +271,7 @@ void __blk_account_io_done(struct request *req, u64 now); */ #define ELV_ON_HASH(rq) ((rq)->rq_flags & RQF_HASHED) -bool blk_insert_flush(struct request *rq); +void blk_insert_flush(struct request *rq); int elevator_switch_mq(struct request_queue *q, struct elevator_type *new_e); diff --git a/block/elevator.c b/block/elevator.c index 1f39f6e8ebb9..19a78d5516ba 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -694,12 +694,18 @@ void elevator_init_mq(struct request_queue *q) if (!e) return; + /* + * We are called before adding disk, when there isn't any FS I/O, + * so freezing queue plus canceling dispatch work is enough to + * drain any dispatch activities originated from passthrough + * requests, then no need to quiesce queue which may add long boot + * latency, especially when lots of disks are involved. + */ blk_mq_freeze_queue(q); - blk_mq_quiesce_queue(q); + blk_mq_cancel_work_sync(q); err = blk_mq_init_sched(q, e); - blk_mq_unquiesce_queue(q); blk_mq_unfreeze_queue(q); if (err) { diff --git a/block/fops.c b/block/fops.c index ad732a36f9b3..0da147edbd18 100644 --- a/block/fops.c +++ b/block/fops.c @@ -15,6 +15,7 @@ #include <linux/falloc.h> #include <linux/suspend.h> #include <linux/fs.h> +#include <linux/module.h> #include "blk.h" static inline struct inode *bdev_file_inode(struct file *file) @@ -340,8 +341,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, } else { ret = bio_iov_iter_get_pages(bio, iter); if (unlikely(ret)) { - bio->bi_status = BLK_STS_IOERR; - bio_endio(bio); + bio_put(bio); return ret; } } diff --git a/block/genhd.c b/block/genhd.c index c5392cc24d37..30362aeacac4 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1111,6 +1111,8 @@ static void disk_release(struct device *dev) might_sleep(); WARN_ON_ONCE(disk_live(disk)); + blk_mq_cancel_work_sync(disk->queue); + disk_release_events(disk); kfree(disk->random); xa_destroy(&disk->part_tbl); diff --git a/block/ioprio.c b/block/ioprio.c index 0e4ff245f2bf..6f01d35a5145 100644 --- a/block/ioprio.c +++ b/block/ioprio.c @@ -69,7 +69,14 @@ int ioprio_check_cap(int ioprio) switch (class) { case IOPRIO_CLASS_RT: - if (!capable(CAP_SYS_NICE) && !capable(CAP_SYS_ADMIN)) + /* + * Originally this only checked for CAP_SYS_ADMIN, + * which was implicitly allowed for pid 0 by security + * modules such as SELinux. Make sure we check + * CAP_SYS_ADMIN first to avoid a denial/avc for + * possibly missing CAP_SYS_NICE permission. + */ + if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE)) return -EPERM; fallthrough; /* rt has prio field too */ @@ -213,6 +220,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who) pgrp = task_pgrp(current); else pgrp = find_vpid(who); + read_lock(&tasklist_lock); do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { tmpio = get_task_ioprio(p); if (tmpio < 0) @@ -222,6 +230,8 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who) else ret = ioprio_best(ret, tmpio); } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); + read_unlock(&tasklist_lock); + break; case IOPRIO_WHO_USER: uid = make_kuid(current_user_ns(), who); diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index a85c351589be..b62c87b8ce4a 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -998,7 +998,14 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) static int cppc_get_perf(int cpunum, enum cppc_regs reg_idx, u64 *perf) { struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum); - struct cpc_register_resource *reg = &cpc_desc->cpc_regs[reg_idx]; + struct cpc_register_resource *reg; + + if (!cpc_desc) { + pr_debug("No CPC descriptor for CPU:%d\n", cpunum); + return -ENODEV; + } + + reg = &cpc_desc->cpc_regs[reg_idx]; if (CPC_IN_PCC(reg)) { int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 7cd0009e7ff3..ef104809f27b 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -347,28 +347,3 @@ void acpi_device_notify_remove(struct device *dev) acpi_unbind_one(dev); } - -int acpi_dev_turn_off_if_unused(struct device *dev, void *not_used) -{ - struct acpi_device *adev = to_acpi_device(dev); - - /* - * Skip device objects with device IDs, because they may be in use even - * if they are not companions of any physical device objects. - */ - if (adev->pnp.type.hardware_id) - return 0; - - mutex_lock(&adev->physical_node_lock); - - /* - * Device objects without device IDs are not in use if they have no - * corresponding physical device objects. - */ - if (list_empty(&adev->physical_node_list)) - acpi_device_set_power(adev, ACPI_STATE_D3_COLD); - - mutex_unlock(&adev->physical_node_lock); - - return 0; -} diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 8fbdc172864b..d91b560e8867 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -117,7 +117,6 @@ bool acpi_device_is_battery(struct acpi_device *adev); bool acpi_device_is_first_physical_node(struct acpi_device *adev, const struct device *dev); int acpi_bus_register_early_device(int type); -int acpi_dev_turn_off_if_unused(struct device *dev, void *not_used); /* -------------------------------------------------------------------------- Device Matching and Notification diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index e312ebaed8db..2366f54d8e9c 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -1084,21 +1084,17 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode, * Returns parent node of an ACPI device or data firmware node or %NULL if * not available. */ -struct fwnode_handle *acpi_node_get_parent(const struct fwnode_handle *fwnode) +static struct fwnode_handle * +acpi_node_get_parent(const struct fwnode_handle *fwnode) { if (is_acpi_data_node(fwnode)) { /* All data nodes have parent pointer so just return that */ return to_acpi_data_node(fwnode)->parent; } else if (is_acpi_device_node(fwnode)) { - acpi_handle handle, parent_handle; - - handle = to_acpi_device_node(fwnode)->handle; - if (ACPI_SUCCESS(acpi_get_parent(handle, &parent_handle))) { - struct acpi_device *adev; + struct device *dev = to_acpi_device_node(fwnode)->dev.parent; - if (!acpi_bus_get_device(parent_handle, &adev)) - return acpi_fwnode_handle(adev); - } + if (dev) + return acpi_fwnode_handle(to_acpi_device(dev)); } return NULL; diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index a50f1967c73d..2c80765670bc 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -2564,12 +2564,6 @@ int __init acpi_scan_init(void) } } - /* - * Make sure that power management resources are not blocked by ACPI - * device objects with no users. - */ - bus_for_each_dev(&acpi_bus_type, NULL, NULL, acpi_dev_turn_off_if_unused); - acpi_turn_off_unused_power_resources(); acpi_scan_initialized = true; diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 49fb74196d02..c75fb600740c 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2710,7 +2710,7 @@ static void binder_transaction(struct binder_proc *proc, t->from = thread; else t->from = NULL; - t->sender_euid = proc->cred->euid; + t->sender_euid = task_euid(proc->tsk); t->to_proc = target_proc; t->to_thread = target_thread; t->code = tr->code; @@ -4422,23 +4422,20 @@ static int binder_thread_release(struct binder_proc *proc, __release(&t->lock); /* - * If this thread used poll, make sure we remove the waitqueue - * from any epoll data structures holding it with POLLFREE. - * waitqueue_active() is safe to use here because we're holding - * the inner lock. + * If this thread used poll, make sure we remove the waitqueue from any + * poll data structures holding it. */ - if ((thread->looper & BINDER_LOOPER_STATE_POLL) && - waitqueue_active(&thread->wait)) { - wake_up_poll(&thread->wait, EPOLLHUP | POLLFREE); - } + if (thread->looper & BINDER_LOOPER_STATE_POLL) + wake_up_pollfree(&thread->wait); binder_inner_proc_unlock(thread->proc); /* - * This is needed to avoid races between wake_up_poll() above and - * and ep_remove_waitqueue() called for other reasons (eg the epoll file - * descriptor being closed); ep_remove_waitqueue() holds an RCU read - * lock, so we can be sure it's done after calling synchronize_rcu(). + * This is needed to avoid races between wake_up_pollfree() above and + * someone else removing the last entry from the queue for other reasons + * (e.g. ep_remove_wait_queue() being called due to an epoll file + * descriptor being closed). Such other users hold an RCU read lock, so + * we can be sure they're done after we call synchronize_rcu(). */ if (thread->looper & BINDER_LOOPER_STATE_POLL) synchronize_rcu(); diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index d60f34718b5d..1e1167e725a4 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -438,6 +438,7 @@ static const struct pci_device_id ahci_pci_tbl[] = { /* AMD */ { PCI_VDEVICE(AMD, 0x7800), board_ahci }, /* AMD Hudson-2 */ { PCI_VDEVICE(AMD, 0x7900), board_ahci }, /* AMD CZ */ + { PCI_VDEVICE(AMD, 0x7901), board_ahci_mobile }, /* AMD Green Sardine */ /* AMD is using RAID class only for ahci controllers */ { PCI_VENDOR_ID_AMD, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_RAID << 8, 0xffffff, board_ahci }, diff --git a/drivers/ata/ahci_ceva.c b/drivers/ata/ahci_ceva.c index 50b56cd0039d..e9c7c07fd84c 100644 --- a/drivers/ata/ahci_ceva.c +++ b/drivers/ata/ahci_ceva.c @@ -94,6 +94,7 @@ struct ceva_ahci_priv { static unsigned int ceva_ahci_read_id(struct ata_device *dev, struct ata_taskfile *tf, u16 *id) { + __le16 *__id = (__le16 *)id; u32 err_mask; err_mask = ata_do_dev_read_id(dev, tf, id); @@ -103,7 +104,7 @@ static unsigned int ceva_ahci_read_id(struct ata_device *dev, * Since CEVA controller does not support device sleep feature, we * need to clear DEVSLP (bit 8) in word78 of the IDENTIFY DEVICE data. */ - id[ATA_ID_FEATURE_SUPP] &= cpu_to_le16(~(1 << 8)); + __id[ATA_ID_FEATURE_SUPP] &= cpu_to_le16(~(1 << 8)); return 0; } diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 8a6835bfd18a..f76b8418e6fb 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -2323,6 +2323,18 @@ int ahci_port_resume(struct ata_port *ap) EXPORT_SYMBOL_GPL(ahci_port_resume); #ifdef CONFIG_PM +static void ahci_handle_s2idle(struct ata_port *ap) +{ + void __iomem *port_mmio = ahci_port_base(ap); + u32 devslp; + + if (pm_suspend_via_firmware()) + return; + devslp = readl(port_mmio + PORT_DEVSLP); + if ((devslp & PORT_DEVSLP_ADSE)) + ata_msleep(ap, devslp_idle_timeout); +} + static int ahci_port_suspend(struct ata_port *ap, pm_message_t mesg) { const char *emsg = NULL; @@ -2336,6 +2348,9 @@ static int ahci_port_suspend(struct ata_port *ap, pm_message_t mesg) ata_port_freeze(ap); } + if (acpi_storage_d3(ap->host->dev)) + ahci_handle_s2idle(ap); + ahci_rpm_put_port(ap); return rc; } diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 8a0ccb190d76..aba0c67d1bd6 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2031,8 +2031,9 @@ retry: dev->horkage |= ATA_HORKAGE_NO_DMA_LOG; goto retry; } - ata_dev_err(dev, "Read log page 0x%02x failed, Emask 0x%x\n", - (unsigned int)page, err_mask); + ata_dev_err(dev, + "Read log 0x%02x page 0x%02x failed, Emask 0x%x\n", + (unsigned int)log, (unsigned int)page, err_mask); } return err_mask; @@ -2177,6 +2178,9 @@ static void ata_dev_config_ncq_prio(struct ata_device *dev) struct ata_port *ap = dev->link->ap; unsigned int err_mask; + if (!ata_identify_page_supported(dev, ATA_LOG_SATA_SETTINGS)) + return; + err_mask = ata_read_log_page(dev, ATA_LOG_IDENTIFY_DEVICE, ATA_LOG_SATA_SETTINGS, @@ -2453,7 +2457,8 @@ static void ata_dev_config_devslp(struct ata_device *dev) * Check device sleep capability. Get DevSlp timing variables * from SATA Settings page of Identify Device Data Log. */ - if (!ata_id_has_devslp(dev->id)) + if (!ata_id_has_devslp(dev->id) || + !ata_identify_page_supported(dev, ATA_LOG_SATA_SETTINGS)) return; err_mask = ata_read_log_page(dev, @@ -3915,6 +3920,8 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "VRFDFC22048UCHC-TE*", NULL, ATA_HORKAGE_NODMA }, /* Odd clown on sil3726/4726 PMPs */ { "Config Disk", NULL, ATA_HORKAGE_DISABLE }, + /* Similar story with ASMedia 1092 */ + { "ASMT109x- Config", NULL, ATA_HORKAGE_DISABLE }, /* Weird ATAPI devices */ { "TORiSAN DVD-ROM DRD-N216", NULL, ATA_HORKAGE_MAX_SEC_128 }, diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index 4e88597aa9df..b9c77885b872 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -827,7 +827,7 @@ static ssize_t ata_scsi_lpm_show(struct device *dev, if (ap->target_lpm_policy >= ARRAY_SIZE(ata_lpm_policy_names)) return -EINVAL; - return snprintf(buf, PAGE_SIZE, "%s\n", + return sysfs_emit(buf, "%s\n", ata_lpm_policy_names[ap->target_lpm_policy]); } DEVICE_ATTR(link_power_management_policy, S_IRUGO | S_IWUSR, @@ -922,7 +922,7 @@ DEVICE_ATTR(ncq_prio_enable, S_IRUGO | S_IWUSR, ata_ncq_prio_enable_show, ata_ncq_prio_enable_store); EXPORT_SYMBOL_GPL(dev_attr_ncq_prio_enable); -struct attribute *ata_ncq_sdev_attrs[] = { +static struct attribute *ata_ncq_sdev_attrs[] = { &dev_attr_unload_heads.attr, &dev_attr_ncq_prio_enable.attr, &dev_attr_ncq_prio_supported.attr, diff --git a/drivers/ata/pata_falcon.c b/drivers/ata/pata_falcon.c index 121635aa8c00..823c88622e34 100644 --- a/drivers/ata/pata_falcon.c +++ b/drivers/ata/pata_falcon.c @@ -55,14 +55,14 @@ static unsigned int pata_falcon_data_xfer(struct ata_queued_cmd *qc, /* Transfer multiple of 2 bytes */ if (rw == READ) { if (swap) - raw_insw_swapw((u16 *)data_addr, (u16 *)buf, words); + raw_insw_swapw(data_addr, (u16 *)buf, words); else - raw_insw((u16 *)data_addr, (u16 *)buf, words); + raw_insw(data_addr, (u16 *)buf, words); } else { if (swap) - raw_outsw_swapw((u16 *)data_addr, (u16 *)buf, words); + raw_outsw_swapw(data_addr, (u16 *)buf, words); else - raw_outsw((u16 *)data_addr, (u16 *)buf, words); + raw_outsw(data_addr, (u16 *)buf, words); } /* Transfer trailing byte, if any. */ @@ -74,16 +74,16 @@ static unsigned int pata_falcon_data_xfer(struct ata_queued_cmd *qc, if (rw == READ) { if (swap) - raw_insw_swapw((u16 *)data_addr, (u16 *)pad, 1); + raw_insw_swapw(data_addr, (u16 *)pad, 1); else - raw_insw((u16 *)data_addr, (u16 *)pad, 1); + raw_insw(data_addr, (u16 *)pad, 1); *buf = pad[0]; } else { pad[0] = *buf; if (swap) - raw_outsw_swapw((u16 *)data_addr, (u16 *)pad, 1); + raw_outsw_swapw(data_addr, (u16 *)pad, 1); else - raw_outsw((u16 *)data_addr, (u16 *)pad, 1); + raw_outsw(data_addr, (u16 *)pad, 1); } words++; } diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index e5838b23c9e0..3b31a4f596d8 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -1394,6 +1394,14 @@ static int sata_fsl_init_controller(struct ata_host *host) return 0; } +static void sata_fsl_host_stop(struct ata_host *host) +{ + struct sata_fsl_host_priv *host_priv = host->private_data; + + iounmap(host_priv->hcr_base); + kfree(host_priv); +} + /* * scsi mid-layer and libata interface structures */ @@ -1426,6 +1434,8 @@ static struct ata_port_operations sata_fsl_ops = { .port_start = sata_fsl_port_start, .port_stop = sata_fsl_port_stop, + .host_stop = sata_fsl_host_stop, + .pmp_attach = sata_fsl_pmp_attach, .pmp_detach = sata_fsl_pmp_detach, }; @@ -1480,9 +1490,9 @@ static int sata_fsl_probe(struct platform_device *ofdev) host_priv->ssr_base = ssr_base; host_priv->csr_base = csr_base; - irq = irq_of_parse_and_map(ofdev->dev.of_node, 0); - if (!irq) { - dev_err(&ofdev->dev, "invalid irq from platform\n"); + irq = platform_get_irq(ofdev, 0); + if (irq < 0) { + retval = irq; goto error_exit_with_cleanup; } host_priv->irq = irq; @@ -1557,10 +1567,6 @@ static int sata_fsl_remove(struct platform_device *ofdev) ata_host_detach(host); - irq_dispose_mapping(host_priv->irq); - iounmap(host_priv->hcr_base); - kfree(host_priv); - return 0; } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index a154cab6cd98..c3a36cfaa855 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -2103,7 +2103,7 @@ static int loop_control_remove(int idx) int ret; if (idx < 0) { - pr_warn("deleting an unspecified loop device is not supported.\n"); + pr_warn_once("deleting an unspecified loop device is not supported.\n"); return -EINVAL; } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 97bf051a50ce..6ae38776e30e 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -316,7 +316,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req = bd->rq; struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); unsigned long flags; - unsigned int num; + int num; int qid = hctx->queue_num; bool notify = false; blk_status_t status; @@ -1049,7 +1049,6 @@ static struct virtio_driver virtio_blk = { .feature_table_size = ARRAY_SIZE(features), .feature_table_legacy = features_legacy, .feature_table_size_legacy = ARRAY_SIZE(features_legacy), - .suppress_used_validation = true, .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .id_table = id_table, diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 08d7953ec5f1..25071126995b 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1853,12 +1853,14 @@ static const struct block_device_operations zram_devops = { .owner = THIS_MODULE }; +#ifdef CONFIG_ZRAM_WRITEBACK static const struct block_device_operations zram_wb_devops = { .open = zram_open, .submit_bio = zram_submit_bio, .swap_slot_free_notify = zram_slot_free_notify, .owner = THIS_MODULE }; +#endif static DEVICE_ATTR_WO(compact); static DEVICE_ATTR_RW(disksize); diff --git a/drivers/bus/mhi/core/pm.c b/drivers/bus/mhi/core/pm.c index fb99e3727155..547e6e769546 100644 --- a/drivers/bus/mhi/core/pm.c +++ b/drivers/bus/mhi/core/pm.c @@ -881,7 +881,7 @@ int mhi_pm_suspend(struct mhi_controller *mhi_cntrl) } EXPORT_SYMBOL_GPL(mhi_pm_suspend); -int mhi_pm_resume(struct mhi_controller *mhi_cntrl) +static int __mhi_pm_resume(struct mhi_controller *mhi_cntrl, bool force) { struct mhi_chan *itr, *tmp; struct device *dev = &mhi_cntrl->mhi_dev->dev; @@ -898,8 +898,12 @@ int mhi_pm_resume(struct mhi_controller *mhi_cntrl) if (MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state)) return -EIO; - if (mhi_get_mhi_state(mhi_cntrl) != MHI_STATE_M3) - return -EINVAL; + if (mhi_get_mhi_state(mhi_cntrl) != MHI_STATE_M3) { + dev_warn(dev, "Resuming from non M3 state (%s)\n", + TO_MHI_STATE_STR(mhi_get_mhi_state(mhi_cntrl))); + if (!force) + return -EINVAL; + } /* Notify clients about exiting LPM */ list_for_each_entry_safe(itr, tmp, &mhi_cntrl->lpm_chans, node) { @@ -940,8 +944,19 @@ int mhi_pm_resume(struct mhi_controller *mhi_cntrl) return 0; } + +int mhi_pm_resume(struct mhi_controller *mhi_cntrl) +{ + return __mhi_pm_resume(mhi_cntrl, false); +} EXPORT_SYMBOL_GPL(mhi_pm_resume); +int mhi_pm_resume_force(struct mhi_controller *mhi_cntrl) +{ + return __mhi_pm_resume(mhi_cntrl, true); +} +EXPORT_SYMBOL_GPL(mhi_pm_resume_force); + int __mhi_device_get_sync(struct mhi_controller *mhi_cntrl) { int ret; diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c index 59a4896a8030..4c577a731709 100644 --- a/drivers/bus/mhi/pci_generic.c +++ b/drivers/bus/mhi/pci_generic.c @@ -20,7 +20,7 @@ #define MHI_PCI_DEFAULT_BAR_NUM 0 -#define MHI_POST_RESET_DELAY_MS 500 +#define MHI_POST_RESET_DELAY_MS 2000 #define HEALTH_CHECK_PERIOD (HZ * 2) diff --git a/drivers/char/agp/parisc-agp.c b/drivers/char/agp/parisc-agp.c index ed3c4c42fc23..d68d05d5d383 100644 --- a/drivers/char/agp/parisc-agp.c +++ b/drivers/char/agp/parisc-agp.c @@ -281,7 +281,7 @@ agp_ioc_init(void __iomem *ioc_regs) return 0; } -static int +static int __init lba_find_capability(int cap) { struct _parisc_agp_info *info = &parisc_agp_info; @@ -366,7 +366,7 @@ fail: return error; } -static int +static int __init find_quicksilver(struct device *dev, void *data) { struct parisc_device **lba = data; @@ -378,7 +378,7 @@ find_quicksilver(struct device *dev, void *data) return 0; } -static int +static int __init parisc_agp_init(void) { extern struct sba_device *sba_list; diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index deed355422f4..c837d5416e0e 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -191,6 +191,8 @@ struct ipmi_user { struct work_struct remove_work; }; +static struct workqueue_struct *remove_work_wq; + static struct ipmi_user *acquire_ipmi_user(struct ipmi_user *user, int *index) __acquires(user->release_barrier) { @@ -1297,7 +1299,7 @@ static void free_user(struct kref *ref) struct ipmi_user *user = container_of(ref, struct ipmi_user, refcount); /* SRCU cleanup must happen in task context. */ - schedule_work(&user->remove_work); + queue_work(remove_work_wq, &user->remove_work); } static void _ipmi_destroy_user(struct ipmi_user *user) @@ -3918,9 +3920,11 @@ static int handle_ipmb_direct_rcv_cmd(struct ipmi_smi *intf, /* We didn't find a user, deliver an error response. */ ipmi_inc_stat(intf, unhandled_commands); - msg->data[0] = ((netfn + 1) << 2) | (msg->rsp[4] & 0x3); - msg->data[1] = msg->rsp[2]; - msg->data[2] = msg->rsp[4] & ~0x3; + msg->data[0] = (netfn + 1) << 2; + msg->data[0] |= msg->rsp[2] & 0x3; /* rqLUN */ + msg->data[1] = msg->rsp[1]; /* Addr */ + msg->data[2] = msg->rsp[2] & ~0x3; /* rqSeq */ + msg->data[2] |= msg->rsp[0] & 0x3; /* rsLUN */ msg->data[3] = cmd; msg->data[4] = IPMI_INVALID_CMD_COMPLETION_CODE; msg->data_size = 5; @@ -4455,13 +4459,24 @@ return_unspecified: msg->rsp[2] = IPMI_ERR_UNSPECIFIED; msg->rsp_size = 3; } else if (msg->type == IPMI_SMI_MSG_TYPE_IPMB_DIRECT) { - /* commands must have at least 3 bytes, responses 4. */ - if (is_cmd && (msg->rsp_size < 3)) { + /* commands must have at least 4 bytes, responses 5. */ + if (is_cmd && (msg->rsp_size < 4)) { |