aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-03-25 10:11:38 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-03-25 10:11:38 -0700
commitaa5b537b0ecc16992577b013f11112d54c7ce869 (patch)
treeac9f6ce6c8c5b4722501cb36e95b3c0a35aa933e
parentMerge tag 's390-5.18-1' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux (diff)
parentRISC-V: add support for restartable sequences (diff)
downloadlinux-dev-aa5b537b0ecc16992577b013f11112d54c7ce869.tar.xz
linux-dev-aa5b537b0ecc16992577b013f11112d54c7ce869.zip
Merge tag 'riscv-for-linus-5.18-mw0' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux
Pull RISC-V updates from Palmer Dabbelt: - Support for Sv57-based virtual memory. - Various improvements for the MicroChip PolarFire SOC and the associated Icicle dev board, which should allow upstream kernels to boot without any additional modifications. - An improved memmove() implementation. - Support for the new Ssconfpmf and SBI PMU extensions, which allows for a much more useful perf implementation on RISC-V systems. - Support for restartable sequences. * tag 'riscv-for-linus-5.18-mw0' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux: (36 commits) rseq/selftests: Add support for RISC-V RISC-V: Add support for restartable sequence MAINTAINERS: Add entry for RISC-V PMU drivers Documentation: riscv: Remove the old documentation RISC-V: Add sscofpmf extension support RISC-V: Add perf platform driver based on SBI PMU extension RISC-V: Add RISC-V SBI PMU extension definitions RISC-V: Add a simple platform driver for RISC-V legacy perf RISC-V: Add a perf core library for pmu drivers RISC-V: Add CSR encodings for all HPMCOUNTERS RISC-V: Remove the current perf implementation RISC-V: Improve /proc/cpuinfo output for ISA extensions RISC-V: Do no continue isa string parsing without correct XLEN RISC-V: Implement multi-letter ISA extension probing framework RISC-V: Extract multi-letter extension names from "riscv, isa" RISC-V: Minimal parser for "riscv, isa" strings RISC-V: Correctly print supported extensions riscv: Fixed misaligned memory access. Fixed pointer comparison. MAINTAINERS: update riscv/microchip entry riscv: dts: microchip: add new peripherals to icicle kit device tree ...
-rw-r--r--Documentation/devicetree/bindings/clock/microchip,mpfs.yaml58
-rw-r--r--Documentation/devicetree/bindings/gpio/microchip,mpfs-gpio.yaml79
-rw-r--r--Documentation/devicetree/bindings/mailbox/microchip,mpfs-mailbox.yaml (renamed from Documentation/devicetree/bindings/mailbox/microchip,polarfire-soc-mailbox.yaml)6
-rw-r--r--Documentation/devicetree/bindings/pwm/microchip,corepwm.yaml81
-rw-r--r--Documentation/devicetree/bindings/rtc/microchip,mfps-rtc.yaml58
-rw-r--r--Documentation/devicetree/bindings/soc/microchip/microchip,mpfs-sys-controller.yaml40
-rw-r--r--Documentation/devicetree/bindings/soc/microchip/microchip,polarfire-soc-sys-controller.yaml35
-rw-r--r--Documentation/riscv/pmu.rst255
-rw-r--r--MAINTAINERS11
-rw-r--r--arch/riscv/Kconfig18
-rw-r--r--arch/riscv/boot/dts/microchip/microchip-mpfs-fabric.dtsi25
-rw-r--r--arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts115
-rw-r--r--arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi254
-rw-r--r--arch/riscv/configs/defconfig1
-rw-r--r--arch/riscv/configs/rv32_defconfig1
-rw-r--r--arch/riscv/include/asm/csr.h67
-rw-r--r--arch/riscv/include/asm/fixmap.h1
-rw-r--r--arch/riscv/include/asm/hwcap.h26
-rw-r--r--arch/riscv/include/asm/page.h1
-rw-r--r--arch/riscv/include/asm/perf_event.h72
-rw-r--r--arch/riscv/include/asm/pgalloc.h49
-rw-r--r--arch/riscv/include/asm/pgtable-64.h106
-rw-r--r--arch/riscv/include/asm/pgtable.h6
-rw-r--r--arch/riscv/include/asm/sbi.h95
-rw-r--r--arch/riscv/kernel/Makefile1
-rw-r--r--arch/riscv/kernel/cpu.c70
-rw-r--r--arch/riscv/kernel/cpufeature.c130
-rw-r--r--arch/riscv/kernel/entry.S4
-rw-r--r--arch/riscv/kernel/perf_event.c485
-rw-r--r--arch/riscv/kernel/signal.c2
-rw-r--r--arch/riscv/lib/memmove.S368
-rw-r--r--arch/riscv/mm/init.c168
-rw-r--r--arch/riscv/mm/kasan_init.c155
-rw-r--r--drivers/perf/Kconfig30
-rw-r--r--drivers/perf/Makefile3
-rw-r--r--drivers/perf/riscv_pmu.c324
-rw-r--r--drivers/perf/riscv_pmu_legacy.c142
-rw-r--r--drivers/perf/riscv_pmu_sbi.c790
-rw-r--r--include/dt-bindings/clock/microchip,mpfs-clock.h45
-rw-r--r--include/linux/cpuhotplug.h1
-rw-r--r--include/linux/perf/riscv_pmu.h75
-rw-r--r--tools/testing/selftests/rseq/param_test.c23
-rw-r--r--tools/testing/selftests/rseq/rseq-riscv.h677
-rw-r--r--tools/testing/selftests/rseq/rseq.h2
44 files changed, 3900 insertions, 1055 deletions
diff --git a/Documentation/devicetree/bindings/clock/microchip,mpfs.yaml b/Documentation/devicetree/bindings/clock/microchip,mpfs.yaml
new file mode 100644
index 000000000000..0c15afa2214c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/microchip,mpfs.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/microchip,mpfs.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip PolarFire Clock Control Module Binding
+
+maintainers:
+ - Daire McNamara <daire.mcnamara@microchip.com>
+
+description: |
+ Microchip PolarFire clock control (CLKCFG) is an integrated clock controller,
+ which gates and enables all peripheral clocks.
+
+ This device tree binding describes 33 gate clocks. Clocks are referenced by
+ user nodes by the CLKCFG node phandle and the clock index in the group, from
+ 0 to 32.
+
+properties:
+ compatible:
+ const: microchip,mpfs-clkcfg
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+ description: |
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell. See include/dt-bindings/clock/microchip,mpfs-clock.h
+ for the full list of PolarFire clock IDs.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ # Clock Config node:
+ - |
+ #include <dt-bindings/clock/microchip,mpfs-clock.h>
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ clkcfg: clock-controller@20002000 {
+ compatible = "microchip,mpfs-clkcfg";
+ reg = <0x0 0x20002000 0x0 0x1000>;
+ clocks = <&ref>;
+ #clock-cells = <1>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/gpio/microchip,mpfs-gpio.yaml b/Documentation/devicetree/bindings/gpio/microchip,mpfs-gpio.yaml
new file mode 100644
index 000000000000..110651eafa70
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/microchip,mpfs-gpio.yaml
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpio/microchip,mpfs-gpio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip MPFS GPIO Controller Device Tree Bindings
+
+maintainers:
+ - Conor Dooley <conor.dooley@microchip.com>
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - microchip,mpfs-gpio
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ description:
+ Interrupt mapping, one per GPIO. Maximum 32 GPIOs.
+ minItems: 1
+ maxItems: 32
+
+ interrupt-controller: true
+
+ clocks:
+ maxItems: 1
+
+ "#gpio-cells":
+ const: 2
+
+ "#interrupt-cells":
+ const: 1
+
+ ngpios:
+ description:
+ The number of GPIOs available.
+ minimum: 1
+ maximum: 32
+ default: 32
+
+ gpio-controller: true
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - "#interrupt-cells"
+ - interrupt-controller
+ - "#gpio-cells"
+ - gpio-controller
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ gpio@20122000 {
+ compatible = "microchip,mpfs-gpio";
+ reg = <0x20122000 0x1000>;
+ clocks = <&clkcfg 25>;
+ interrupt-parent = <&plic>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ interrupts = <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/mailbox/microchip,polarfire-soc-mailbox.yaml b/Documentation/devicetree/bindings/mailbox/microchip,mpfs-mailbox.yaml
index bbb173ea483c..082d397d3e89 100644
--- a/Documentation/devicetree/bindings/mailbox/microchip,polarfire-soc-mailbox.yaml
+++ b/Documentation/devicetree/bindings/mailbox/microchip,mpfs-mailbox.yaml
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/mailbox/microchip,polarfire-soc-mailbox.yaml#"
+$id: "http://devicetree.org/schemas/mailbox/microchip,mpfs-mailbox.yaml#"
$schema: "http://devicetree.org/meta-schemas/core.yaml#"
title: Microchip PolarFire SoC (MPFS) MSS (microprocessor subsystem) mailbox controller
@@ -11,7 +11,7 @@ maintainers:
properties:
compatible:
- const: microchip,polarfire-soc-mailbox
+ const: microchip,mpfs-mailbox
reg:
items:
@@ -38,7 +38,7 @@ examples:
#address-cells = <2>;
#size-cells = <2>;
mbox: mailbox@37020000 {
- compatible = "microchip,polarfire-soc-mailbox";
+ compatible = "microchip,mpfs-mailbox";
reg = <0x0 0x37020000 0x0 0x1000>, <0x0 0x2000318c 0x0 0x40>;
interrupt-parent = <&L1>;
interrupts = <96>;
diff --git a/Documentation/devicetree/bindings/pwm/microchip,corepwm.yaml b/Documentation/devicetree/bindings/pwm/microchip,corepwm.yaml
new file mode 100644
index 000000000000..a7fae1772a81
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/microchip,corepwm.yaml
@@ -0,0 +1,81 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pwm/microchip,corepwm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip IP corePWM controller bindings
+
+maintainers:
+ - Conor Dooley <conor.dooley@microchip.com>
+
+description: |
+ corePWM is an 16 channel pulse width modulator FPGA IP
+
+ https://www.microsemi.com/existing-parts/parts/152118
+
+allOf:
+ - $ref: pwm.yaml#
+
+properties:
+ compatible:
+ items:
+ - const: microchip,corepwm-rtl-v4
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ "#pwm-cells":
+ const: 2
+
+ microchip,sync-update-mask:
+ description: |
+ Depending on how the IP is instantiated, there are two modes of operation.
+ In synchronous mode, all channels are updated at the beginning of the PWM period,
+ and in asynchronous mode updates happen as the control registers are written.
+ A 16 bit wide "SHADOW_REG_EN" parameter of the IP core controls whether synchronous
+ mode is possible for each channel, and is set by the bitstream programmed to the
+ FPGA. If the IP core is instantiated with SHADOW_REG_ENx=1, both registers that
+ control the duty cycle for channel x have a second "shadow"/buffer reg synthesised.
+ At runtime a bit wide register exposed to APB can be used to toggle on/off
+ synchronised mode for all channels it has been synthesised for.
+ Each bit of "microchip,sync-update-mask" corresponds to a PWM channel & represents
+ whether synchronous mode is possible for the PWM channel.
+
+ $ref: /schemas/types.yaml#/definitions/uint32
+ default: 0
+
+ microchip,dac-mode-mask:
+ description: |
+ Optional, per-channel Low Ripple DAC mode is possible on this IP core. It creates
+ a minimum period pulse train whose High/Low average is that of the chosen duty
+ cycle. This "DAC" will have far better bandwidth and ripple performance than the
+ standard PWM algorithm can achieve. A 16 bit DAC_MODE module parameter of the IP
+ core, set at instantiation and by the bitstream programmed to the FPGA, determines
+ whether a given channel operates in regular PWM or DAC mode.
+ Each bit corresponds to a PWM channel & represents whether DAC mode is enabled
+ for that channel.
+
+ $ref: /schemas/types.yaml#/definitions/uint32
+ default: 0
+
+required:
+ - compatible
+ - reg
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ pwm@41000000 {
+ compatible = "microchip,corepwm-rtl-v4";
+ microchip,sync-update-mask = /bits/ 32 <0>;
+ clocks = <&clkcfg 30>;
+ reg = <0x41000000 0xF0>;
+ #pwm-cells = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/rtc/microchip,mfps-rtc.yaml b/Documentation/devicetree/bindings/rtc/microchip,mfps-rtc.yaml
new file mode 100644
index 000000000000..a2e984ea3553
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/microchip,mfps-rtc.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/microchip,mfps-rtc.yaml#
+
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip PolarFire Soc (MPFS) RTC Device Tree Bindings
+
+allOf:
+ - $ref: rtc.yaml#
+
+maintainers:
+ - Daire McNamara <daire.mcnamara@microchip.com>
+ - Lewis Hanly <lewis.hanly@microchip.com>
+
+properties:
+ compatible:
+ enum:
+ - microchip,mpfs-rtc
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ items:
+ - description: |
+ RTC_WAKEUP interrupt
+ - description: |
+ RTC_MATCH, asserted when the content of the Alarm register is equal
+ to that of the RTC's count register.
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: rtc
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ rtc@20124000 {
+ compatible = "microchip,mpfs-rtc";
+ reg = <0x20124000 0x1000>;
+ clocks = <&clkcfg 21>;
+ clock-names = "rtc";
+ interrupts = <80>, <81>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/soc/microchip/microchip,mpfs-sys-controller.yaml b/Documentation/devicetree/bindings/soc/microchip/microchip,mpfs-sys-controller.yaml
new file mode 100644
index 000000000000..b0dae51e1d42
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/microchip/microchip,mpfs-sys-controller.yaml
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/soc/microchip/microchip,mpfs-sys-controller.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Microchip PolarFire SoC (MPFS) MSS (microprocessor subsystem) system controller
+
+maintainers:
+ - Conor Dooley <conor.dooley@microchip.com>
+
+description: |
+ PolarFire SoC devices include a microcontroller acting as the system controller,
+ which provides "services" to the main processor and to the FPGA fabric. These
+ services include hardware rng, reprogramming of the FPGA and verfification of the
+ eNVM contents etc. More information on these services can be found online, at
+ https://onlinedocs.microchip.com/pr/GUID-1409CF11-8EF9-4C24-A94E-70979A688632-en-US-1/index.html
+
+ Communication with the system controller is done via a mailbox, of which the client
+ portion is documented here.
+
+properties:
+ mboxes:
+ maxItems: 1
+
+ compatible:
+ const: microchip,mpfs-sys-controller
+
+required:
+ - compatible
+ - mboxes
+
+additionalProperties: false
+
+examples:
+ - |
+ syscontroller {
+ compatible = "microchip,mpfs-sys-controller";
+ mboxes = <&mbox 0>;
+ };
diff --git a/Documentation/devicetree/bindings/soc/microchip/microchip,polarfire-soc-sys-controller.yaml b/Documentation/devicetree/bindings/soc/microchip/microchip,polarfire-soc-sys-controller.yaml
deleted file mode 100644
index 2cd3bc6bd8d6..000000000000
--- a/Documentation/devicetree/bindings/soc/microchip/microchip,polarfire-soc-sys-controller.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
-%YAML 1.2
----
-$id: "http://devicetree.org/schemas/soc/microchip/microchip,polarfire-soc-sys-controller.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
-
-title: Microchip PolarFire SoC (MPFS) MSS (microprocessor subsystem) system controller
-
-maintainers:
- - Conor Dooley <conor.dooley@microchip.com>
-
-description: |
- The PolarFire SoC system controller is communicated with via a mailbox.
- This document describes the bindings for the client portion of that mailbox.
-
-
-properties:
- mboxes:
- maxItems: 1
-
- compatible:
- const: microchip,polarfire-soc-sys-controller
-
-required:
- - compatible
- - mboxes
-
-additionalProperties: false
-
-examples:
- - |
- syscontroller: syscontroller {
- compatible = "microchip,polarfire-soc-sys-controller";
- mboxes = <&mbox 0>;
- };
diff --git a/Documentation/riscv/pmu.rst b/Documentation/riscv/pmu.rst
deleted file mode 100644
index acb216b99c26..000000000000
--- a/Documentation/riscv/pmu.rst
+++ /dev/null
@@ -1,255 +0,0 @@
-===================================
-Supporting PMUs on RISC-V platforms
-===================================
-
-Alan Kao <alankao@andestech.com>, Mar 2018
-
-Introduction
-------------
-
-As of this writing, perf_event-related features mentioned in The RISC-V ISA
-Privileged Version 1.10 are as follows:
-(please check the manual for more details)
-
-* [m|s]counteren
-* mcycle[h], cycle[h]
-* minstret[h], instret[h]
-* mhpeventx, mhpcounterx[h]
-
-With such function set only, porting perf would require a lot of work, due to
-the lack of the following general architectural performance monitoring features:
-
-* Enabling/Disabling counters
- Counters are just free-running all the time in our case.
-* Interrupt caused by counter overflow
- No such feature in the spec.
-* Interrupt indicator
- It is not possible to have many interrupt ports for all counters, so an
- interrupt indicator is required for software to tell which counter has
- just overflowed.
-* Writing to counters
- There will be an SBI to support this since the kernel cannot modify the
- counters [1]. Alternatively, some vendor considers to implement
- hardware-extension for M-S-U model machines to write counters directly.
-
-This document aims to provide developers a quick guide on supporting their
-PMUs in the kernel. The following sections briefly explain perf' mechanism
-and todos.
-
-You may check previous discussions here [1][2]. Also, it might be helpful
-to check the appendix for related kernel structures.
-
-
-1. Initialization
------------------
-
-*riscv_pmu* is a global pointer of type *struct riscv_pmu*, which contains
-various methods according to perf's internal convention and PMU-specific
-parameters. One should declare such instance to represent the PMU. By default,
-*riscv_pmu* points to a constant structure *riscv_base_pmu*, which has very
-basic support to a baseline QEMU model.
-
-Then he/she can either assign the instance's pointer to *riscv_pmu* so that
-the minimal and already-implemented logic can be leveraged, or invent his/her
-own *riscv_init_platform_pmu* implementation.
-
-In other words, existing sources of *riscv_base_pmu* merely provide a
-reference implementation. Developers can flexibly decide how many parts they
-can leverage, and in the most extreme case, they can customize every function
-according to their needs.
-
-
-2. Event Initialization
------------------------
-
-When a user launches a perf command to monitor some events, it is first
-interpreted by the userspace perf tool into multiple *perf_event_open*
-system calls, and then each of them calls to the body of *event_init*
-member function that was assigned in the previous step. In *riscv_base_pmu*'s
-case, it is *riscv_event_init*.
-
-The main purpose of this function is to translate the event provided by user
-into bitmap, so that HW-related control registers or counters can directly be
-manipulated. The translation is based on the mappings and methods provided in
-*riscv_pmu*.
-
-Note that some features can be done in this stage as well:
-
-(1) interrupt setting, which is stated in the next section;
-(2) privilege level setting (user space only, kernel space only, both);
-(3) destructor setting. Normally it is sufficient to apply *riscv_destroy_event*;
-(4) tweaks for non-sampling events, which will be utilized by functions such as
- *perf_adjust_period*, usually something like the follows::
-
- if (!is_sampling_event(event)) {
- hwc->sample_period = x86_pmu.max_period;
- hwc->last_period = hwc->sample_period;
- local64_set(&hwc->period_left, hwc->sample_period);
- }
-
-In the case of *riscv_base_pmu*, only (3) is provided for now.
-
-
-3. Interrupt
-------------
-
-3.1. Interrupt Initialization
-
-This often occurs at the beginning of the *event_init* method. In common
-practice, this should be a code segment like::
-
- int x86_reserve_hardware(void)
- {
- int err = 0;
-
- if (!atomic_inc_not_zero(&pmc_refcount)) {
- mutex_lock(&pmc_reserve_mutex);
- if (atomic_read(&pmc_refcount) == 0) {
- if (!reserve_pmc_hardware())
- err = -EBUSY;
- else
- reserve_ds_buffers();
- }
- if (!err)
- atomic_inc(&pmc_refcount);
- mutex_unlock(&pmc_reserve_mutex);
- }
-
- return err;
- }
-
-And the magic is in *reserve_pmc_hardware*, which usually does atomic
-operations to make implemented IRQ accessible from some global function pointer.
-*release_pmc_hardware* serves the opposite purpose, and it is used in event
-destructors mentioned in previous section.
-
-(Note: From the implementations in all the architectures, the *reserve/release*
-pair are always IRQ settings, so the *pmc_hardware* seems somehow misleading.
-It does NOT deal with the binding between an event and a physical counter,
-which will be introduced in the next section.)
-
-3.2. IRQ Structure
-
-Basically, a IRQ runs the following pseudo code::
-
- for each hardware counter that triggered this overflow
-
- get the event of this counter
-
- // following two steps are defined as *read()*,
- // check the section Reading/Writing Counters for details.
- count the delta value since previous interrupt
- update the event->count (# event occurs) by adding delta, and
- event->hw.period_left by subtracting delta
-
- if the event overflows
- sample data
- set the counter appropriately for the next overflow
-
- if the event overflows again
- too frequently, throttle this event
- fi
- fi
-
- end for
-
-However as of this writing, none of the RISC-V implementations have designed an
-interrupt for perf, so the details are to be completed in the future.
-
-4. Reading/Writing Counters
----------------------------
-
-They seem symmetric but perf treats them quite differently. For reading, there
-is a *read* interface in *struct pmu*, but it serves more than just reading.
-According to the context, the *read* function not only reads the content of the
-counter (event->count), but also updates the left period to the next interrupt
-(event->hw.period_left).
-
-But the core of perf does not need direct write to counters. Writing counters
-is hidden behind the abstraction of 1) *pmu->start*, literally start counting so one
-has to set the counter to a good value for the next interrupt; 2) inside the IRQ
-it should set the counter to the same resonable value.
-
-Reading is not a problem in RISC-V but writing would need some effort, since
-counters are not allowed to be written by S-mode.
-
-
-5. add()/del()/start()/stop()
------------------------------
-
-Basic idea: add()/del() adds/deletes events to/from a PMU, and start()/stop()
-starts/stop the counter of some event in the PMU. All of them take the same
-arguments: *struct perf_event *event* and *int flag*.
-
-Consider perf as a state machine, then you will find that these functions serve
-as the state transition process between those states.
-Three states (event->hw.state) are defined:
-
-* PERF_HES_STOPPED: the counter is stopped
-* PERF_HES_UPTODATE: the event->count is up-to-date
-* PERF_HES_ARCH: arch-dependent usage ... we don't need this for now
-
-A normal flow of these state transitions are as follows:
-
-* A user launches a perf event, resulting in calling to *event_init*.
-* When being context-switched in, *add* is called by the perf core, with a flag
- PERF_EF_START, which means that the event should be started after it is added.
- At this stage, a general event is bound to a physical counter, if any.
- The state changes to PERF_HES_STOPPED and PERF_HES_UPTODATE, because it is now
- stopped, and the (software) event count does not need updating.
-
- - *start* is then called, and the counter is enabled.
- With flag PERF_EF_RELOAD, it writes an appropriate value to the counter (check
- previous section for detail).
- Nothing is written if the flag does not contain PERF_EF_RELOAD.
- The state now is reset to none, because it is neither stopped nor updated
- (the counting already started)
-
-* When being context-switched out, *del* is called. It then checks out all the
- events in the PMU and calls *stop* to update their counts.
-
- - *stop* is called by *del*
- and the perf core with flag PERF_EF_UPDATE, and it often shares the same
- subroutine as *read* with the same logic.
- The state changes to PERF_HES_STOPPED and PERF_HES_UPTODATE, again.
-
- - Life cycle of these two pairs: *add* and *del* are called repeatedly as
- tasks switch in-and-out; *start* and *stop* is also called when the perf core
- needs a quick stop-and-start, for instance, when the interrupt period is being
- adjusted.
-
-Current implementation is sufficient for now and can be easily extended to
-features in the future.
-
-A. Related Structures
----------------------
-
-* struct pmu: include/linux/perf_event.h
-* struct riscv_pmu: arch/riscv/include/asm/perf_event.h
-
- Both structures are designed to be read-only.
-
- *struct pmu* defines some function pointer interfaces, and most of them take
- *struct perf_event* as a main argument, dealing with perf events according to
- perf's internal state machine (check kernel/events/core.c for details).
-
- *struct riscv_pmu* defines PMU-specific parameters. The naming follows the
- convention of all other architectures.
-
-* struct perf_event: include/linux/perf_event.h
-* struct hw_perf_event
-
- The generic structure that represents perf events, and the hardware-related
- details.
-
-* struct riscv_hw_events: arch/riscv/include/asm/perf_event.h
-
- The structure that holds the status of events, has two fixed members:
- the number of events and the array of the events.
-
-References
-----------
-
-[1] https://github.com/riscv/riscv-linux/pull/124
-
-[2] https://groups.google.com/a/groups.riscv.org/forum/#!topic/sw-dev/f19TmCNP6yA
diff --git a/MAINTAINERS b/MAINTAINERS
index e1931e84f87d..51e1fd934432 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16707,6 +16707,15 @@ S: Maintained
F: drivers/mtd/nand/raw/r852.c
F: drivers/mtd/nand/raw/r852.h
+RISC-V PMU DRIVERS
+M: Atish Patra <atishp@atishpatra.org>
+R: Anup Patel <anup@brainfault.org>
+L: linux-riscv@lists.infradead.org
+S: Supported
+F: drivers/perf/riscv_pmu.c
+F: drivers/perf/riscv_pmu_legacy.c
+F: drivers/perf/riscv_pmu_sbi.c
+
RISC-V ARCHITECTURE
M: Paul Walmsley <paul.walmsley@sifive.com>
M: Palmer Dabbelt <palmer@dabbelt.com>
@@ -16721,8 +16730,10 @@ K: riscv
RISC-V/MICROCHIP POLARFIRE SOC SUPPORT
M: Lewis Hanly <lewis.hanly@microchip.com>
+M: Conor Dooley <conor.dooley@microchip.com>
L: linux-riscv@lists.infradead.org
S: Supported
+F: arch/riscv/boot/dts/microchip/
F: drivers/mailbox/mailbox-mpfs.c
F: drivers/soc/microchip/
F: include/soc/microchip/mpfs.h
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 0804b9a11934..ea8ec8a960bd 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -102,6 +102,7 @@ config RISCV
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
+ select HAVE_RSEQ
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
select MODULES_USE_ELF_RELA if MODULES
@@ -152,7 +153,7 @@ config PAGE_OFFSET
hex
default 0xC0000000 if 32BIT
default 0x80000000 if 64BIT && !MMU
- default 0xffffaf8000000000 if 64BIT
+ default 0xff60000000000000 if 64BIT
config KASAN_SHADOW_OFFSET
hex
@@ -200,7 +201,7 @@ config FIX_EARLYCON_MEM
config PGTABLE_LEVELS
int
- default 4 if 64BIT
+ default 5 if 64BIT
default 2
config LOCKDEP_SUPPORT
@@ -331,19 +332,6 @@ config RISCV_ISA_C
If you don't know what to do here, say Y.
-menu "supported PMU type"
- depends on PERF_EVENTS
-
-config RISCV_BASE_PMU
- bool "Base Performance Monitoring Unit"
- def_bool y
- help
- A base PMU that serves as a reference implementation and has limited
- feature of perf. It can run on any RISC-V machines so serves as the
- fallback, but this option can also be disable to reduce kernel size.
-
-endmenu
-
config FPU
bool "FPU support"
default y
diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-fabric.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs-fabric.dtsi
new file mode 100644
index 000000000000..854320e17b28
--- /dev/null
+++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-fabric.dtsi
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Copyright (c) 2020-2021 Microchip Technology Inc */
+
+/ {
+ core_pwm0: pwm@41000000 {
+ compatible = "microchip,corepwm-rtl-v4";
+ reg = <0x0 0x41000000 0x0 0xF0>;
+ microchip,sync-update-mask = /bits/ 32 <0>;
+ #pwm-cells = <2>;
+ clocks = <&clkcfg CLK_FIC3>;
+ status = "disabled";
+ };
+
+ i2c2: i2c@44000000 {
+ compatible = "microchip,corei2c-rtl-v7";
+ reg = <0x0 0x44000000 0x0 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&clkcfg CLK_FIC3>;
+ interrupt-parent = <&plic>;
+ interrupts = <122>;
+ clock-frequency = <100000>;
+ status = "disabled";
+ };
+};
diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
index 0c748ae1b006..cd2fe80fa81a 100644
--- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
+++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: (GPL-2.0 OR MIT)
-/* Copyright (c) 2020 Microchip Technology Inc */
+/* Copyright (c) 2020-2021 Microchip Technology Inc */
/dts-v1/;
@@ -13,25 +13,34 @@
compatible = "microchip,mpfs-icicle-kit", "microchip,mpfs";
aliases {
- ethernet0 = &emac1;
- serial0 = &serial0;
- serial1 = &serial1;
- serial2 = &serial2;
- serial3 = &serial3;
+ ethernet0 = &mac1;
+ serial0 = &mmuart0;
+ serial1 = &mmuart1;
+ serial2 = &mmuart2;
+ serial3 = &mmuart3;
+ serial4 = &mmuart4;
};
chosen {
- stdout-path = "serial0:115200n8";
+ stdout-path = "serial1:115200n8";
};
cpus {
timebase-frequency = <RTCCLK_FREQ>;
};
- memory@80000000 {
+ ddrc_cache_lo: memory@80000000 {
device_type = "memory";
- reg = <0x0 0x80000000 0x0 0x40000000>;
- clocks = <&clkcfg 26>;
+ reg = <0x0 0x80000000 0x0 0x2e000000>;
+ clocks = <&clkcfg CLK_DDRC>;
+ status = "okay";
+ };
+
+ ddrc_cache_hi: memory@1000000000 {
+ device_type = "memory";
+ reg = <0x10 0x0 0x0 0x40000000>;
+ clocks = <&clkcfg CLK_DDRC>;
+ status = "okay";
};
};
@@ -39,19 +48,19 @@
clock-frequency = <600000000>;
};
-&serial0 {
+&mmuart1 {
status = "okay";
};
-&serial1 {
+&mmuart2 {
status = "okay";
};
-&serial2 {
+&mmuart3 {
status = "okay";
};
-&serial3 {
+&mmuart4 {
status = "okay";
};
@@ -61,28 +70,92 @@
bus-width = <4>;
disable-wp;
cap-sd-highspeed;
+ cap-mmc-highspeed;
card-detect-delay = <200>;
+ mmc-ddr-1_8v;
+ mmc-hs200-1_8v;
sd-uhs-sdr12;
sd-uhs-sdr25;
sd-uhs-sdr50;
sd-uhs-sdr104;
};
-&emac0 {
+&spi0 {
+ status = "okay";
+};
+
+&spi1 {
+ status = "okay";
+};
+
+&qspi {
+ status = "okay";
+};
+
+&i2c0 {
+ status = "okay";
+};
+
+&i2c1 {
+ status = "okay";
+};
+
+&i2c2 {
+ status = "okay";
+};
+
+&mac0 {
phy-mode = "sgmii";
phy-handle = <&phy0>;
- phy0: ethernet-phy@8 {
- reg = <8>;
- ti,fifo-depth = <0x01>;
- };
};
-&emac1 {
+&mac1 {
status = "okay";
phy-mode = "sgmii";
phy-handle = <&phy1>;
phy1: ethernet-phy@9 {
reg = <9>;
- ti,fifo-depth = <0x01>;
+ ti,fifo-depth = <0x1>;
};
+ phy0: ethernet-phy@8 {
+ reg = <8>;
+ ti,fifo-depth = <0x1>;
+ };
+};
+
+&gpio2 {
+ interrupts = <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>;
+ status = "okay";
+};
+
+&rtc {
+ status = "okay";
+};
+
+&usb {
+ status = "okay";
+ dr_mode = "host";
+};
+
+&mbox {
+ status = "okay";
+};
+
+&syscontroller {
+ status = "okay";
+};
+
+&pcie {
+ status = "okay";
+};
+
+&core_pwm0 {
+ status = "okay";
};
diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
index 869aaf0d5c06..c5c9d1360de0 100644
--- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
+++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: (GPL-2.0 OR MIT)
-/* Copyright (c) 2020 Microchip Technology Inc */
+/* Copyright (c) 2020-2021 Microchip Technology Inc */
/dts-v1/;
+#include "dt-bindings/clock/microchip,mpfs-clock.h"
+#include "microchip-mpfs-fabric.dtsi"
/ {
#address-cells = <2>;
@@ -13,8 +15,7 @@
#address-cells = <1>;
#size-cells = <0>;
- cpu@0 {
- clock-frequency = <0>;
+ cpu0: cpu@0 {
compatible = "sifive,e51", "sifive,rocket0", "riscv";
device_type = "cpu";
i-cache-block-size = <64>;
@@ -22,6 +23,7 @@
i-cache-size = <16384>;
reg = <0>;
riscv,isa = "rv64imac";
+ clocks = <&clkcfg CLK_CPU>;
status = "disabled";
cpu0_intc: interrupt-controller {
@@ -31,8 +33,7 @@
};
};
- cpu@1 {
- clock-frequency = <0>;
+ cpu1: cpu@1 {
compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
d-cache-block-size = <64>;
d-cache-sets = <64>;
@@ -48,6 +49,7 @@
mmu-type = "riscv,sv39";
reg = <1>;
riscv,isa = "rv64imafdc";
+ clocks = <&clkcfg CLK_CPU>;
tlb-split;
status = "okay";
@@ -58,8 +60,7 @@
};
};
- cpu@2 {
- clock-frequency = <0>;
+ cpu2: cpu@2 {
compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
d-cache-block-size = <64>;
d-cache-sets = <64>;
@@ -75,6 +76,7 @@
mmu-type = "riscv,sv39";
reg = <2>;
riscv,isa = "rv64imafdc";
+ clocks = <&clkcfg CLK_CPU>;
tlb-split;
status = "okay";
@@ -85,8 +87,7 @@
};
};
- cpu@3 {
- clock-frequency = <0>;
+ cpu3: cpu@3 {
compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
d-cache-block-size = <64>;
d-cache-sets = <64>;
@@ -102,6 +103,7 @@
mmu-type = "riscv,sv39";
reg = <3>;
riscv,isa = "rv64imafdc";
+ clocks = <&clkcfg CLK_CPU>;
tlb-split;
status = "okay";
@@ -112,8 +114,7 @@
};
};
- cpu@4 {
- clock-frequency = <0>;
+ cpu4: cpu@4 {
compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
d-cache-block-size = <64>;
d-cache-sets = <64>;
@@ -129,6 +130,7 @@
mmu-type = "riscv,sv39";
reg = <4>;
riscv,isa = "rv64imafdc";
+ clocks = <&clkcfg CLK_CPU>;
tlb-split;
status = "okay";
cpu4_intc: interrupt-controller {
@@ -150,8 +152,9 @@
compatible = "simple-bus";
ranges;
- cache-controller@2010000 {
+ cctrllr: cache-controller@2010000 {
compatible = "sifive,fu540-c000-ccache", "cache";
+ reg = <0x0 0x2010000 0x0 0x1000>;
cache-block-size = <64>;
cache-level = <2>;
cache-sets = <1024>;
@@ -159,10 +162,9 @@
cache-unified;
interrupt-parent = <&plic>;
interrupts = <1>, <2>, <3>;
- reg = <0x0 0x2010000 0x0 0x1000>;
};
- clint@2000000 {
+ clint: clint@2000000 {
compatible = "sifive,fu540-c000-clint", "sifive,clint0";
reg = <0x0 0x2000000 0x0 0xC000>;
interrupts-extended = <&cpu0_intc 3>, <&cpu0_intc 7>,
@@ -186,15 +188,6 @@
riscv,ndev = <186>;
};
- dma@3000000 {
- compatible = "sifive,fu540-c000-pdma";
- reg = <0x0 0x3000000 0x0 0x8000>;
- interrupt-parent = <&plic>;
- interrupts = <23>, <24>, <25>, <26>, <27>, <28>, <29>,
- <30>;
- #dma-cells = <1>;
- };
-
clkcfg: clkcfg@20002000 {
compatible = "microchip,mpfs-clkcfg";
reg = <0x0 0x20002000 0x0 0x1000>;
@@ -202,7 +195,7 @@
#clock-cells = <1>;
};
- serial0: serial@20000000 {
+ mmuart0: serial@20000000 {
compatible = "ns16550a";
reg = <0x0 0x20000000 0x0 0x400>;
reg-io-width = <4>;
@@ -210,11 +203,11 @@
interrupt-parent = <&plic>;
interrupts = <90>;
current-speed = <115200>;
- clocks = <&clkcfg 8>;
- status = "disabled";
+ clocks = <&clkcfg CLK_MMUART0>;
+ status = "disabled"; /* Reserved for the HSS */
};
- serial1: serial@20100000 {
+ mmuart1: serial@20100000 {
compatible = "ns16550a";
reg = <0x0 0x20100000 0x0 0x400>;
reg-io-width = <4>;
@@ -222,11 +215,11 @@
interrupt-parent = <&plic>;
interrupts = <91>;
current-speed = <115200>;
- clocks = <&clkcfg 9>;
+ clocks = <&clkcfg CLK_MMUART1>;
status = "disabled";
};
- serial2: serial@20102000 {
+ mmuart2: serial@20102000 {
compatible = "ns16550a";
reg = <0x0 0x20102000 0x0 0x400>;
reg-io-width = <4>;
@@ -234,11 +227,11 @@
interrupt-parent = <&plic>;
interrupts = <92>;
current-speed = <115200>;
- clocks = <&clkcfg 10>;
+ clocks = <&clkcfg CLK_MMUART2>;
status = "disabled";
};
- serial3: serial@20104000 {
+ mmuart3: serial@20104000 {
compatible = "ns16550a";
reg = <0x0 0x20104000 0x0 0x400>;
reg-io-width = <4>;
@@ -246,7 +239,19 @@
interrupt-parent = <&plic>;
interrupts = <93>;
current-speed = <115200>;
- clocks = <&clkcfg 11>;
+ clocks = <&clkcfg CLK_MMUART3>;
+ status = "disabled";
+ };
+
+ mmuart4: serial@20106000 {
+ compatible = "ns16550a";
+ reg = <0x0 0x20106000 0x0 0x400>;
+ reg-io-width = <4>;
+ reg-shift = <2>;
+ interrupt-parent = <&plic>;
+ interrupts = <94>;
+ clocks = <&clkcfg CLK_MMUART4>;
+ current-speed = <115200>;
status = "disabled";
};
@@ -255,37 +260,196 @@
compatible = "microchip,mpfs-sd4hc", "cdns,sd4hc";
reg = <0x0 0x20008000 0x0 0x1000>;
interrupt-parent = <&plic>;
- interrupts = <88>, <89>;
- clocks = <&clkcfg 6>;
+ interrupts = <88>;
+ clocks = <&clkcfg CLK_MMC>;
max-frequency = <200000000>;
status = "disabled";
};
- emac0: ethernet@20110000 {
+ spi0: spi@20108000 {
+ compatible = "microchip,mpfs-spi";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x0 0x20108000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupts = <54>;
+ clocks = <&clkcfg CLK_SPI0>;
+ spi-max-frequency = <25000000>;
+ status = "disabled";
+ };
+
+ spi1: spi@20109000 {
+ compatible = "microchip,mpfs-spi";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x0 0x20109000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupts = <55>;
+ clocks = <&clkcfg CLK_SPI1>;
+ spi-max-frequency = <25000000>;
+ status = "disabled";
+ };
+
+ qspi: spi@21000000 {
+ compatible = "microchip,mpfs-qspi";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x0 0x21000000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupts = <85>;
+ clocks = <&clkcfg CLK_QSPI>;
+ spi-max-frequency = <25000000>;
+ status = "disabled";
+ };
+
+ i2c0: i2c@2010a000 {
+ compatible = "microchip,mpfs-i2c", "microchip,corei2c-rtl-v7";
+ reg = <0x0 0x2010a000 0x0 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ interrupt-parent = <&plic>;
+ interrupts = <58>;
+ clocks = <&clkcfg CLK_I2C0>;
+ clock-frequency = <100000>;
+ status = "disabled";
+ };
+
+ i2c1: i2c@2010b000 {
+ compatible = "microchip,mpfs-i2c", "microchip,corei2c-rtl-v7";
+ reg = <0x0 0x2010b000 0x0 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ interrupt-parent = <&plic>;
+ interrupts = <61>;
+ clocks = <&clkcfg CLK_I2C1>;
+ clock-frequency = <100000>;
+ status = "disabled";
+ };
+
+ mac0: ethernet@20110000 {
compatible = "cdns,macb";
reg = <0x0 0x20110000 0x0 0x2000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
interrupt-parent = <&plic>;
- interrupts = <64>, <65>, <66>, <67>;
+ interrupts = <64>, <65>, <66>, <67>, <68>, <69>;
local-mac-address = [00 00 00 00 00 00];
- clocks = <&clkcfg 4>, <&clkcfg 2>;
+ clocks = <&clkcfg CLK_MAC0>, <&clkcfg CLK_AHB>;
clock-names = "pclk", "hclk";
status = "disabled";
- #address-cells = <1>;
- #size-cells = <0>;
};
- emac1: ethernet@20112000 {
+ mac1: ethernet@20112000 {
compatible = "cdns,macb";
reg = <0x0 0x20112000 0x0 0x2000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
interrupt-parent = <&plic>;
- interrupts = <70>, <71>, <72>, <73>;
+ interrupts = <70>, <71>, <72>, <73>, <74>, <75>;
local-mac-address = [00 00 00 00 00 00];
- clocks = <&clkcfg 5>, <&clkcfg 2>;
- status = "disabled";
+ clocks = <&clkcfg CLK_MAC1>, <&clkcfg CLK_AHB>;
clock-names = "pclk", "hclk";
- #address-cells = <1>;
- #size-cells = <0>;
+ status = "disabled";
+ };
+
+ gpio0: gpio@20120000 {
+ compatible = "microchip,mpfs-gpio";
+ reg = <0x0 0x20120000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ clocks = <&clkcfg CLK_GPIO0>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ status = "disabled";
+ };
+
+ gpio1: gpio@20121000 {
+ compatible = "microchip,mpfs-gpio";
+ reg = <000 0x20121000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ clocks = <&clkcfg CLK_GPIO1>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ status = "disabled";
+ };
+
+ gpio2: gpio@20122000 {
+ compatible = "microchip,mpfs-gpio";
+ reg = <0x0 0x20122000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ clocks = <&clkcfg CLK_GPIO2>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ status = "disabled";
+ };
+
+ rtc: rtc@20124000 {
+ compatible = "microchip,mpfs-rtc";
+ reg = <0x0 0x20124000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupts = <80>, <81>;
+ clocks = <&clkcfg CLK_RTC>;
+ clock-names = "rtc";
+ status = "disabled";
+ };
+
+ usb: usb@20201000 {
+ compatible = "microchip,mpfs-musb";
+ reg = <0x0 0x20201000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupts = <86>, <87>;
+ clocks = <&clkcfg CLK_USB>;
+ interrupt-names = "dma","mc";
+ status = "disabled";
};
+ pcie: pcie@2000000000 {
+ compatible = "microchip,pcie-host-1.0";
+ #address-cells = <0x3>;
+ #interrupt-cells = <0x1>;
+ #size-cells = <0x2>;
+ device_type = "pci";
+ reg = <0x20 0x0 0x0 0x8000000>, <0x0 0x43000000 0x0 0x10000>;
+ reg-names = "cfg", "apb";
+ bus-range = <0x0 0x7f>;
+ interrupt-parent = <&plic>;
+ interrupts = <119>;
+ interrupt-map = <0 0 0 1 &pcie_intc 0>,
+ <0 0 0 2 &pcie_intc 1>,
+ <0 0 0 3 &pcie_intc 2>,
+ <0 0 0 4 &pcie_intc 3>;
+ interrupt-map-mask = <0 0 0 7>;
+ clocks = <&clkcfg CLK_FIC0>, <&clkcfg CLK_FIC1>, <&clkcfg CLK_FIC3>;
+ clock-names = "fic0", "fic1", "fic3";
+ ranges = <0x3000000 0x0 0x8000000 0x20 0x8000000 0x0 0x80000000>;
+ msi-parent = <&pcie>;
+ msi-controller;
+ microchip,axi-m-atr0 = <0x10 0x0>;
+ status = "disabled";
+ pcie_intc: legacy-interrupt-controller {
+ #address-cells = <0>;
+ #interrupt-cells = <1>;
+ interrupt-controller;
+ };
+ };
+
+ mbox: mailbox@37020000 {
+ compatible = "microchip,mpfs-mailbox";
+ reg = <0x0 0x37020000 0x0 0x1000>, <0x0 0x2000318C 0x0 0x40>;
+ interrupt-parent = <&plic>;
+ interrupts = <96>;
+ #mbox-cells = <1>;
+ status = "disabled";
+ };
+
+ syscontroller: syscontroller {
+ compatible = "microchip,mpfs-sys-controller";
+ mboxes = <&mbox 0>;
+ };
};
};
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index f120fcc43d0a..7cd10ded7bf8 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -108,6 +108,7 @@ CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V4=y
CONFIG_NFS_V4_1=y
diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig
index 8b56a7f1eb06..e0e5c7c09ab8 100644
--- a/arch/riscv/configs/rv32_defconfig
+++ b/arch/riscv/configs/rv32_defconfig
@@ -100,6 +100,7 @@ CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V4=y
CONFIG_NFS_V4_1=y
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index ae711692eec9..e935f27b10fd 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -47,6 +47,7 @@
#define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL)
#define SATP_MODE_39 _AC(0x8000000000000000, UL)
#define SATP_MODE_48 _AC(0x9000000000000000, UL)
+#define SATP_MODE_57 _AC(0xa000000000000000, UL)
#define SATP_ASID_BITS 16
#define SATP_ASID_SHIFT 44
#define SATP_ASID_MASK _AC(0xFFFF, UL)
@@ -65,6 +66,7 @@
#define IRQ_S_EXT 9
#define IRQ_VS_EXT 10
#define IRQ_M_EXT 11
+#define IRQ_PMU_OVF 13
/* Exception causes */
#define EXC_INST_MISALIGNED 0
@@ -150,9 +152,69 @@
#define CSR_CYCLE 0xc00
#define CSR_TIME 0xc01
#define CSR_INSTRET 0xc02
+#define CSR_HPMCOUNTER3 0xc03
+#define CSR_HPMCOUNTER4 0xc04
+#define CSR_HPMCOUNTER5 0xc05
+#define CSR_HPMCOUNTER6 0xc06
+#define CSR_HPMCOUNTER7 0xc07
+#define CSR_HPMCOUNTER8 0xc08
+#define CSR_HPMCOUNTER9 0xc09
+#define CSR_HPMCOUNTER10 0xc0a
+#define CSR_HPMCOUNTER11 0xc0b
+#define CSR_HPMCOUNTER12 0xc0c
+#define CSR_HPMCOUNTER13 0xc0d
+#define CSR_HPMCOUNTER14 0xc0e
+#define CSR_HPMCOUNTER15 0xc0f
+#define CSR_HPMCOUNTER16 0xc10
+#define CSR_HPMCOUNTER17 0xc11
+#define CSR_HPMCOUNTER18 0xc12
+#define CSR_HPMCOUNTER19 0xc13
+#define CSR_HPMCOUNTER20 0xc14
+#define CSR_HPMCOUNTER21 0xc15
+#define CSR_HPMCOUNTER22 0xc16
+#define CSR_HPMCOUNTER23 0xc17
+#define CSR_HPMCOUNTER24 0xc18
+#define CSR_HPMCOUNTER25 0xc19
+#define CSR_HPMCOUNTER26 0xc1a
+#define CSR_HPMCOUNTER27 0xc1b
+#define CSR_HPMCOUNTER28 0xc1c
+#define CSR_HPMCOUNTER29 0xc1d
+#define CSR_HPMCOUNTER30 0xc1e
+#define CSR_HPMCOUNTER31 0xc1f
#define CSR_CYCLEH 0xc80
#define CSR_TIMEH 0xc81
#define CSR_INSTRETH 0xc82
+#define CSR_HPMCOUNTER3H 0xc83
+#define CSR_HPMCOUNTER4H 0xc84
+#define CSR_HPMCOUNTER5H 0xc85
+#define CSR_HPMCOUNTER6H 0xc86
+#define CSR_HPMCOUNTER7H 0xc87
+#define CSR_HPMCOUNTER8H 0xc88
+#define CSR_HPMCOUNTER9H 0xc89
+#define CSR_HPMCOUNTER10H 0xc8a
+#define CSR_HPMCOUNTER11H 0xc8b
+#define CSR_HPMCOUNTER12H 0xc8c
+#define CSR_HPMCOUNTER13H 0xc8d
+#define CSR_HPMCOUNTER14H 0xc8e
+#define CSR_HPMCOUNTER15H 0xc8f
+#define CSR_HPMCOUNTER16H 0xc90
+#define CSR_HPMCOUNTER17H 0xc91
+#define CSR_HPMCOUNTER18H 0xc92
+#define CSR_HPMCOUNTER19H 0xc93
+#define CSR_HPMCOUNTER20H 0xc94
+#define CSR_HPMCOUNTER21H 0xc95
+#define CSR_HPMCOUNTER22H 0xc96
+#define CSR_HPMCOUNTER23H 0xc97
+#define CSR_HPMCOUNTER24H 0xc98
+#define CSR_HPMCOUNTER25H 0xc99
+#define CSR_HPMCOUNTER26H 0xc9a
+#define CSR_HPMCOUNTER27H 0xc9b
+#define CSR_HPMCOUNTER28H 0xc9c
+#define CSR_HPMCOUNTER29H 0xc9d
+#define CSR_HPMCOUNTER30H 0xc9e
+#define CSR_HPMCOUNTER31H 0xc9f
+
+#define CSR_SSCOUNTOVF 0xda0
#define CSR_SSTATUS 0x100
#define CSR_SIE 0x104
@@ -240,7 +302,10 @@
# define RV_IRQ_SOFT IRQ_S_SOFT
# define RV_IRQ_TIMER IRQ_S_TIMER
# define RV_IRQ_EXT IRQ_S_EXT
-#endif /* CONFIG_RISCV_M_MODE */
+# define RV_IRQ_PMU IRQ_PMU_OVF
+# define SIP_LCOFIP (_AC(0x1, UL) << IRQ_PMU_OVF)
+
+#endif /* !CONFIG_RISCV_M_MODE */
/* IE/IP (Supervisor/Machine Interrupt Enable/Pending) flags */
#define IE_SIE (_AC(0x1, UL) << RV_IRQ_SOFT)
diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
index 58a718573ad6..3cfece8b6568 100644
--- a/arch/riscv/include/asm/fixmap.h
+++ b/arch/riscv/include/asm/fixmap.h
@@ -25,6 +25,7 @@ enum fixed_addresses {
FIX_PTE,
FIX_PMD,
FIX_PUD,
+ FIX_P4D,
FIX_TEXT_POKE1,
FIX_TEXT_POKE0,
FIX_EARLYCON_MEM_BASE,
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 5ce50468aff1..0734e42f74f2 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -34,7 +34,33 @@ extern unsigned long elf_hwcap;
#define RISCV_ISA_EXT_s ('s' - 'a')
#define RISCV_ISA_EXT_u ('u' - 'a')
+/*
+ * Increse this to higher value as kernel support more ISA extensions.
+ */
#define RISCV_ISA_EXT_MAX 64
+#define RISCV_ISA_EXT_NAME_LEN_MAX 32
+
+/* The base ID for multi-letter ISA extensions */
+#define RISCV_ISA_EXT_BASE 26
+
+/*
+ * This enum represent the logical ID for each multi-letter RISC-V ISA extension.
+ * The logical ID should start from RISCV_ISA_EXT_BASE and must not exceed
+ * RISCV_ISA_EXT_MAX. 0-25 range is reserved for single letter
+ * extensions while all the multi-letter extensions should define the next
+ * available logical extension id.
+ */
+enum riscv_isa_ext_id {
+ RISCV_ISA_EXT_SSCOFPMF = RISCV_ISA_EXT_BASE,
+ RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX,
+};
+
+struct riscv_isa_ext_data {
+ /* Name of the extension displayed to userspace via /proc/cpuinfo */
+ char uprop[RISCV_ISA_EXT_NAME_LEN_MAX];
+ /* The logical ISA extension ID */
+ unsigned int isa_ext_id;
+};
unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 004372f8da54..1526e410e802 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -41,6 +41,7 @@
* By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so
* define the PAGE_OFFSET value for SV39.
*/
+#define PAGE_OFFSET_L4 _AC(0xffffaf8000000000, UL)
#define PAGE_OFFSET_L3 _AC(0xffffffd800000000, UL)
#else
#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h
index 062efd3a1d5d..d42c901f9a97 100644
--- a/arch/riscv/include/asm/perf_event.h
+++ b/arch/riscv/include/asm/perf_event.h
@@ -9,77 +9,5 @@
#define _ASM_RISCV_PERF_EVENT_H
#include <linux/perf_event.h>
-#include <linux/ptrace.h>
-#include <linux/interrupt.h>
-
-#ifdef CONFIG_RISCV_BASE_PMU
-#define RISCV_BASE_COUNTERS 2
-
-/*
- * The RISCV_MAX_COUNTERS parameter should be specified.
- */
-
-#define RISCV_MAX_COUNTERS 2
-
-/*
- * These are the indexes of bits in counteren register *minus* 1,
- * except for cycle. It would be coherent if it can directly mapped
- * to counteren bit definition, but there is a *time* register at
- * counteren[1]. Per-cpu structure is scarce resource here.
- *
- * According to the spec, an implementation can support counter up to
- * mhpmcounter31, but many high-end processors has at most 6 general
- * PMCs, we give the definition to MHPMCOUNTER8 here.
- */
-#define RISCV_PMU_CYCLE 0
-#define RISCV_PMU_INSTRET 1
-#define RISCV_PMU_MHPMCOUNTER3 2
-#define RISCV_PMU_MHPMCOUNTER4 3
-#define RISCV_PMU_MHPMCOUNTER5 4
-#define RISCV_PMU_MHPMCOUNTER6 5
-#define RISCV_PMU_MHPMCOUNTER7 6
-#define RISCV_PMU_MHPMCOUNTER8 7
-
-#define RISCV_OP_UNSUPP (-EOPNOTSUPP)
-
-struct cpu_hw_events {
- /* # currently enabled events*/
- int n_events;
- /* currently enabled events */
- struct perf_event *events[RISCV_MAX_COUNTERS];
- /* vendor-defined PMU data */
- void *platform;
-};
-
-struct riscv_pmu {
- struct pmu *pmu;
-
- /* generic hw/cache events table */
- const int *hw_events;
- const int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX];
- /* method used to map hw/cache events */
- int (*map_hw_event)(u64 config);
- int (*map_cache_event)(u64 config);
-
- /* max generic hw events in map */
- int max_events;
- /* number total counters, 2(base) + x(general) */
- int num_counters;
- /* the width of the counter */
- int counter_width;
-
- /* vendor-defined PMU features */
- void *platform;
-
- irqreturn_t (*handle_irq)(int irq_num, void *dev);
- int irq;
-};
-
-#endif
-#ifdef CONFIG_PERF_EVENTS
#define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs
-#endif
-
#endif /* _ASM_RISCV_PERF_EVENT_H */
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index 11823004b87a..947f23d7b6af 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -59,6 +59,26 @@ static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d,
}
}
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
+{
+ if (pgtable_l5_enabled) {
+ unsigned long pfn = virt_to_pfn(p4d);
+
+ set_pgd(pgd, __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
+ }
+}
+
+static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd,
+ p4d_t *p4d)
+{
+ if (pgtable_l5_enabled) {
+ unsigned long pfn = virt_to_pfn(p4d);
+
+ set_pgd_safe(pgd,
+ __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
+ }
+}
+
#define pud_alloc_one pud_alloc_one
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
@@ -76,6 +96,35 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
}
#define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud)
+
+#define p4d_alloc_one p4d_alloc_one
+static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ if (pgtable_l5_enabled) {
+ gfp_t gfp = GFP_PGTABLE_USER;
+
+ if (mm == &init_mm)
+ gfp = GFP_PGTABLE_KERNEL;
+ return (p4d_t *)get_zeroed_page(gfp);
+ }
+
+ return NULL;
+}
+
+static inline void __p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+ BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
+ free_page((unsigned long)p4d);
+}
+
+#define p4d_free p4d_free
+static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+ if (pgtable_l5_enabled)
+ __p4d_free(mm, p4d);
+}
+
+#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d)
#endif /* __PAGETABLE_PMD_FOLDED */
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index bbbdd66e5e2f..7e246e9f8d70 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -9,16 +9,24 @@
#include <linux/const.h>
extern bool pgtable_l4_enabled;
+extern bool pgtable_l5_enabled;
#define PGDIR_SHIFT_L3 30
#define PGDIR_SHIFT_L4 39
+#define PGDIR_SHIFT_L5 48
#define PGDIR_SIZE_L3 (_AC(1, UL) << PGDIR_SHIFT_L3)
-#define PGDIR_SHIFT (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3)
+#define PGDIR_SHIFT (pgtable_l5_enabled ? PGDIR_SHIFT_L5 : \
+ (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3))
/* Size of region mapped by a page global directory */
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
+/* p4d is folded into pgd in case of 4-level page table */
+#define P4D_SHIFT 39
+#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
+#define P4D_MASK (~(P4D_SIZE - 1))
+
/* pud is folded into pgd in case of 3-level page table */
#define PUD_SHIFT 30
#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
@@ -29,6 +37,15 @@ extern bool pgtable_l4_enabled;
#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE - 1))
+/* Page 4th Directory entry */
+typedef struct {
+ unsigned long p4d;
+} p4d_t;
+
+#define p4d_val(x) ((x).p4d)
+#define __p4d(x) ((p4d_t) { (x) })
+#define PTRS_PER_P4D (PAGE_SIZE / sizeof(p4d_t))
+
/* Page Upper Directory entry */
typedef struct {
unsigned long pud;
@@ -99,6 +116,15 @@ static inline struct page *pud_page(pud_t pud)
return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
}
+#define mm_p4d_folded mm_p4d_folded
+static inline bool mm_p4d_folded(struct mm_struct *mm)
+{
+ if (pgtable_l5_enabled)
+ return false;
+
+ return true;
+}
+
#define mm_pud_folded mm_pud_folded
static inline bool mm_pud_folded(struct mm_struct *mm)
{
@@ -128,6 +154,9 @@ static inline unsigned long _pmd_pfn(pmd_t pmd)
#define pud_ERROR(e) \
pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
+#define p4d_ERROR(e) \
+ pr_err("%s:%d: bad p4d %016lx.\n", __FILE__, __LINE__, p4d_val(e))
+
static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
{
if (pgtable_l4_enabled)
@@ -166,6 +195,16 @@ static inline void p4d_clear(p4d_t *p4d)
set_p4d(p4d, __p4d(0));
}
+static inline p4d_t pfn_p4d(unsigned long pfn, pgprot_t prot)
+{
+ return __p4d((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
+}
+
+static inline unsigned long _p4d_pfn(p4d_t p4d)
+{
+ return p4d_val(p4d) >> _PAGE_PFN_SHIFT;
+}
+
static inline pud_t *p4d_pgtable(p4d_t p4d)
{
if (pgtable_l4_enabled)
@@ -173,6 +212,7 @@ static inline pud_t *p4d_pgtable(p4d_t p4d)
return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) });
}
+#define p4d_page_vaddr(p4d) ((unsigned long)p4d_pgtable(p4d))
static inline struct page *p4d_page(p4d_t p4d)
{
@@ -190,4 +230,68 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
return (pud_t *)p4d;
}
+static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ if (pgtable_l5_enabled)
+ *pgdp = pgd;
+ else
+ set_p4d((p4d_t *)pgdp, (p4d_t){ pgd_val(pgd) });
+}
+
+static inline int pgd_none(pgd_t pgd)
+{
+ if (pgtable_l5_enabled)
+ return (pgd_val(pgd) == 0);
+
+ return 0;
+}
+
+static inline int pgd_present(pgd_t pgd)
+{
+ if (pgtable_l5_enabled)
+ return (pgd_val(pgd) & _PAGE_PRESENT);
+
+ return 1;
+}
+
+static inline int pgd_bad(pgd_t pgd)
+{
+ if (pgtable_l5_enabled)
+ return !pgd_present(pgd);
+
+ return 0;
+}
+
+static inline void pgd_clear(pgd_t *pgd)
+{
+ if (pgtable_l5_enabled)
+ set_pgd(pgd, __pgd(0));
+}
+
+static inline p4d_t *pgd_pgtable(pgd_t pgd)
+{
+ if (pgtable_l5_enabled)
+ return (p4d_t *)pfn_to_virt(pgd_val(pgd) >> _PAGE_PFN_SHIFT);
+
+ return (p4d_t *)p4d_pgtable((p4d_t) { pgd_val(pgd) });
+}
+#define pgd_page_vaddr(pgd) ((unsigned long)pgd_pgtable(pgd))
+
+static inline struct page *pgd_page(pgd_t pgd)
+{
+ return pfn_to_page(pgd_val(pgd) >> _PAGE_PFN_SHIFT);
+}
+#define pgd_page(pgd) pgd_page(pgd)
+
+#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
+
+#define p4d_offset p4d_offset
+static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
+{
+ if (pgtable_l5_enabled)
+ return pgd_pgtable(*pgd) + p4d_index(address);
+
+ return (p4d_t *)pgd;
+}
+
#endif /* _ASM_RISCV_PGTABLE_64_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index e3549e50de95..046b44225623 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -63,7 +63,8 @@
* position vmemmap directly below the VMALLOC region.
*/
#ifdef CONFIG_64BIT
-#define VA_BITS (pgtable_l4_enabled ? 48 : 39)
+#define VA_BITS (pgtable_l5_enabled ? \
+ 57 : (pgtable_l4_enabled ? 48 : 39))
#else
#define VA_BITS 32
#endif
@@ -103,7 +104,6 @@
#ifndef __ASSEMBLY__
-#include <asm-generic/pgtable-nop4d.h>
#include <asm/page.h>
#include <asm/tlbflush.h>
#include <linux/mm_types.h>
@@ -134,6 +134,8 @@ struct pt_alloc_ops {
phys_addr_t (*alloc_pmd)(uintptr_t va);
pud_t *(*get_pud_virt)(phys_addr_t pa);
phys_addr_t (*alloc_pud)(uintptr_t va);
+ p4d_t *(*get_p4d_virt)(phys_addr_t pa);
+ phys_addr_t (*alloc_p4d)(uintptr_t va);
#endif
};
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 06133b4f8e20..9e3c2cf1edaf 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -29,6 +29,7 @@ enum sbi_ext_id {
SBI_EXT_RFENCE = 0x52464E43,
SBI_EXT_HSM = 0x48534D,
SBI_EXT_SRST = 0x53525354,
+ SBI_EXT_PMU = 0x504D55,
/* Experimentals extensions must lie within this range */
SBI_EXT_EXPERIMENTAL_START = 0x08000000,
@@ -112,6 +113,98 @@ enum sbi_srst_reset_reason {
SBI_SRST_RESET_REASON_SYS_FAILURE,
};
+enum sbi_ext_pmu_fid {
+ SBI_EXT_PMU_NUM_COUNTERS = 0,
+ SBI_EXT_PMU_COUNTER_GET_INFO,
+ SBI_EXT_PMU_COUNTER_CFG_MATCH,
+ SBI_EXT_PMU_COUNTER_START,
+ SBI_EXT_PMU_COUNTER_STOP,
+ SBI_EXT_PMU_COUNTER_FW_READ,
+};
+
+#define RISCV_PMU_RAW_EVENT_MASK GENMASK_ULL(55, 0)
+#define RISCV_PMU_RAW_EVENT_IDX 0x20000
+
+/** General pmu event codes specified in SBI PMU extension */
+enum sbi_pmu_hw_generic_events_t {
+ SBI_PMU_HW_NO_EVENT = 0,
+ SBI_PMU_HW_CPU_CYCLES = 1,
+ SBI_PMU_HW_INSTRUCTIONS = 2,
+ SBI_PMU_HW_CACHE_REFERENCES = 3,
+ SBI_PMU_HW_CACHE_MISSES = 4,
+ SBI_PMU_HW_BRANCH_INSTRUCTIONS = 5,
+ SBI_PMU_HW_BRANCH_MISSES = 6,
+ SBI_PMU_HW_BUS_CYCLES = 7,
+ SBI_PMU_HW_STALLED_CYCLES_FRONTEND = 8,
+ SBI_PMU_HW_STALLED_CYCLES_BACKEND = 9,
+ SBI_PMU_HW_REF_CPU_CYCLES = 10,
+
+ SBI_PMU_HW_GENERAL_MAX,
+};
+
+/**
+ * Special "firmware" events provided by the firmware, even if the hardware
+ * does not support performance events. These events are encoded as a raw
+ * event type in Linux kernel perf framework.
+ */
+enum sbi_pmu_fw_generic_events_t {
+ SBI_PMU_FW_MISALIGNED_LOAD = 0,
+ SBI_PMU_FW_MISALIGNED_STORE = 1,
+ SBI_PMU_FW_ACCESS_LOAD = 2,
+ SBI_PMU_FW_ACCESS_STORE = 3,
+ SBI_PMU_FW_ILLEGAL_INSN = 4,
+ SBI_PMU_FW_SET_TIMER = 5,
+ SBI_PMU_FW_IPI_SENT = 6,
+ SBI_PMU_FW_IPI_RECVD = 7,
+ SBI_PMU_FW_FENCE_I_SENT = 8,
+ SBI_PMU_FW_FENCE_I_RECVD = 9,
+ SBI_PMU_FW_SFENCE_VMA_SENT = 10,
+ SBI_PMU_FW_SFENCE_VMA_RCVD = 11,
+ SBI_PMU_FW_SFENCE_VMA_ASID_SENT = 12,
+ SBI_PMU_FW_SFENCE_VMA_ASID_RCVD = 13,
+
+ SBI_PMU_FW_HFENCE_GVMA_SENT = 14,
+ SBI_PMU_FW_HFENCE_GVMA_RCVD = 15,
+ SBI_PMU_FW_HFENCE_GVMA_VMID_SENT = 16,
+ SBI_PMU_FW_HFENCE_GVMA_VMID_RCVD = 17,
+
+ SBI_PMU_FW_HFENCE_VVMA_SENT = 18,
+ SBI_PMU_FW_HFENCE_VVMA_RCVD = 19,
+ SBI_PMU_FW_HFENCE_VVMA_ASID_SENT = 20,
+ SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD = 21,
+ SBI_PMU_FW_MAX,
+};
+
+/* SBI PMU event types */
+enum sbi_pmu_event_type {
+ SBI_PMU_EVENT_TYPE_HW = 0x0,
+ SBI_PMU_EVENT_TYPE_CACHE = 0x1,
+ SBI_PMU_EVENT_TYPE_RAW = 0x2,
+ SBI_PMU_EVENT_TYPE_FW = 0xf,
+};
+
+/* SBI PMU event types */
+enum sbi_pmu_ctr_type {
+ SBI_PMU_CTR_TYPE_HW = 0x0,
+ SBI_PMU_CTR_TYPE_FW,
+};
+
+/* Flags defined for config matching function */
+#define SBI_PMU_CFG_FLAG_SKIP_MATCH (1 << 0)
+#define SBI_PMU_CFG_FLAG_CLEAR_VALUE (1 << 1)
+#define SBI_PMU_CFG_FLAG_AUTO_START (1 << 2)
+#define SBI_PMU_CFG_FLAG_SET_VUINH (1 << 3)
+#define SBI_PMU_CFG_FLAG_SET_VSNH (1 << 4)
+#define SBI_PMU_CFG_FLAG_SET_UINH (1 << 5)
+#define SBI_PMU_CFG_FLAG_SET_SINH (1 << 6)
+#define SBI_PMU_CFG_FLAG_SET_MINH (1 << 7)
+
+/* Flags defined for counter start function */
+#define SBI_PMU_START_FLAG_SET_INIT_VALUE (1 << 0)
+
+/* Flags defined for counter stop function */
+#define SBI_PMU_STOP_FLAG_RESET (1 << 0)
+
#define SBI_SPEC_VERSION_DEFAULT 0x1
#define SBI_SPEC_VERSION_MAJOR_SHIFT 24
#define SBI_SPEC_VERSION_MAJOR_MASK 0x7f
@@ -125,6 +218,8 @@ enum sbi_srst_reset_reason {
#define SBI_ERR_DENIED -4
#define SBI_ERR_INVALID_ADDRESS -5
#define SBI_ERR_ALREADY_AVAILABLE -6
+#define SBI_ERR_ALREADY_STARTED -7
+#define SBI_ERR_ALREADY_STOPPED -8
extern unsigned long sbi_spec_version;
struct sbiret {
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index ffc87e76b1dd..e0133d113216 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -53,7 +53,6 @@ obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o
obj-$(CONFIG_TRACE_IRQFLAGS) += trace_irq.o
-obj-$(CONFIG_RISCV_BASE_PMU) += perf_event.o
obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o
obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o
obj-$(CONFIG_RISCV_SBI) += sbi.o
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index ad0a7e9f828b..d2a936195295 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -6,6 +6,7 @@
#include <linux/init.h>
#include <linux/seq_file.h>
#include <linux/of.h>
+#include <asm/hwcap.h>
#include <asm/smp.h>
#include <asm/pgtable.h>
@@ -63,12 +64,73 @@ int riscv_of_parent_hartid(struct device_node *node)
}
#ifdef CONFIG_PROC_FS
+#define __RISCV_ISA_EXT_DATA(UPROP, EXTID) \
+ { \
+ .uprop = #UPROP, \
+ .isa_ext_id = EXTID, \
+ }
+/**
+ * Here are the ordering rules of extension naming defined by RISC-V
+ * specification :
+ * 1. All extensions should be separated from other multi-letter extensions
+ * from other multi-letter extensions by an underscore.
+ * 2. The first letter following the 'Z' conventionally indicates the most
+ * closely related alphabetical extension category, IMAFDQLCBKJTPVH.
+ * If multiple 'Z' extensions are named, they should be ordered first
+ * by category, then alphabetically within a category.
+ * 3. Standard supervisor-level extensions (starts with 'S') should be
+ * listed after standard unprivileged extensions. If multiple
+ * supervisor-level extensions are listed, they should be ordered
+ * alphabetically.
+ * 4. Non-standard extensions (starts with 'X') must be listed after all
+ * standard extensions. They must be separated from other multi-letter
+ * extensions by an underscore.
+ */
+static struct riscv_isa_ext_data isa_ext_arr[] = {
+ __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),
+ __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
+};
+
+static void print_isa_ext(struct seq_file *f)
+{
+ struct riscv_isa_ext_data *edata;
+ int i = 0, arr_sz;
+
+ arr_sz = ARRAY_SIZE(isa_ext_arr) - 1;
+
+ /* No extension support available */
+ if (arr_sz <= 0)
+ return;
+
+ for (i = 0; i <= arr_sz; i++) {
+ edata = &isa_ext_arr[i];
+ if (!__riscv_isa_extension_available(NULL, edata->isa_ext_id))
+ continue;
+ seq_printf(f, "_%s", edata->uprop);
+ }
+}
+
+/**
+ * These are the only valid base (single letter) ISA extensions as per the spec.
+ * It also specifies the canonical order in which it appears in the spec.
+ * Some of the extension may just be a place holder for now (B, K, P, J).
+ * This should be updated once corresponding extensions are ratified.
+ */
+static const char base_riscv_exts[13] = "imafdqcbkjpvh";
static void print_isa(struct seq_file *f, const char *isa)
{
- /* Print the entire ISA as it is */
+ int i;
+
seq_puts(f, "isa\t\t: ");
- seq_write(f, isa, strlen(isa));
+ /* Print the rv[64/32] part */
+ seq_write(f, isa, 4);
+ for (i = 0; i < sizeof(base_riscv_exts); i++) {
+ if (__riscv_isa_extension_available(NULL, base_riscv_exts[i] - 'a'))
+ /* Print only enabled the base ISA extensions */
+ seq_write(f, &base_riscv_exts[i], 1);
+ }
+ print_isa_ext(f);
seq_puts(f, "\n");
}
@@ -79,7 +141,9 @@ static void print_mmu(struct seq_file *f)
#if defined(CONFIG_32BIT)
strncpy(sv_type, "sv32", 5);
#elif defined(CONFIG_64BIT)
- if (pgtable_l4_enabled)
+ if (pgtable_l5_enabled)
+ strncpy(sv_type, "sv57", 5);
+ else if (pgtable_l4_enabled)
strncpy(sv_type, "sv48", 5);
else
strncpy(sv_type, "sv39", 5);
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index d959d207a40d..1b2d42d7f589 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -7,12 +7,15 @@
*/
#include <linux/bitmap.h>
+#include <linux/ctype.h>
#include <linux/of.h>
#include <asm/processor.h>
#include <asm/hwcap.h>
#include <asm/smp.h>
#include <asm/switch_to.h>
+#define NUM_ALPHA_EXTS ('z' - 'a' + 1)
+
unsigned long elf_hwcap __read_mostly;
/* Host ISA bitmap */
@@ -63,8 +66,8 @@ void __init riscv_fill_hwcap(void)
{
struct device_node *node;
const char *isa;
- char print_str[BITS_PER_LONG + 1];
- size_t i, j, isa_len;
+ char print_str[NUM_ALPHA_EXTS + 1];
+ int i, j;
static unsigned long isa2hwcap[256] = {0};
isa2hwcap['i'] = isa2hwcap['I'] = COMPAT_HWCAP_ISA_I;
@@ -80,7 +83,8 @@ void __init riscv_fill_hwcap(void)
for_each_of_cpu_node(node) {
unsigned long this_hwcap = 0;
- unsigned long this_isa = 0;
+ DECLARE_BITMAP(this_isa, RISCV_ISA_EXT_MAX);
+ const char *temp;
if (riscv_of_processor_hartid(node) < 0)
continue;
@@ -90,23 +94,106 @@ void __init riscv_fill_hwcap(void)
continue;
}
- i = 0;
- isa_len = strlen(isa);
+ temp = isa;
#if IS_ENABLED(CONFIG_32BIT)
if (!strncmp(isa, "rv32", 4))
- i += 4;
+ isa += 4;
#elif IS_ENABLED(CONFIG_64BIT)
if (!strncmp(isa, "rv64", 4))
- i += 4;
+ isa += 4;
#endif
- for (; i < isa_len; ++i) {
- this_hwcap |= isa2hwcap[(unsigned char)(isa[i])];
- /*
- * TODO: X, Y and Z extension parsing for Host ISA
- * bitmap will be added in-future.
- */
- if ('a' <= isa[i] && isa[i] < 'x')
- this_isa |= (1UL << (isa[i] - 'a'));
+ /* The riscv,isa DT property must start with rv64 or rv32 */
+ if (temp == isa)
+ continue;
+ bitmap_zero(this_isa, RISCV_ISA_EXT_MAX);
+ for (; *isa; ++isa) {
+ const char *ext = isa++;
+ const char *ext_end = isa;
+ bool ext_long = false, ext_err = false;
+
+ switch (*ext) {
+ case 's':
+ /**
+ * Workaround for invalid single-letter 's' & 'u'(QEMU).
+ * No need to set the bit in riscv_isa as 's' & 'u' are
+ * not valid ISA extensions. It works until multi-letter
+ * extension starting with "Su" appears.
+ */
+ if (ext[-1] != '_' && ext[1] == 'u') {
+ ++isa;
+ ext_err = true;
+ break;
+ }
+ fallthrough;
+ case 'x':
+ case 'z':
+ ext_long = true;
+ /* Multi-letter extension must be delimited */
+ for (; *isa && *isa != '_'; ++isa)
+ if (unlikely(!islower(*isa)
+ && !isdigit(*isa)))
+ ext_err = true;
+ /* Parse backwards */
+ ext_end = isa;
+ if (unlikely(ext_err))
+ break;
+ if (!isdigit(ext_end[-1]))
+ break;
+ /* Skip the minor version */
+ while (isdigit(*--ext_end))
+ ;
+ if (ext_end[0] != 'p'
+ || !isdigit(ext_end[-1])) {
+ /* Advance it to offset the pre-decrement */
+ ++ext_end;
+ break;
+ }
+ /* Skip the major version */
+ while (isdigit(*--ext_end))
+ ;
+ ++ext_end;
+ break;
+ default:
+ if (unlikely(!islower(*ext))) {
+ ext_err = true;
+ break;
+ }
+ /* Find next extension */
+ if (!isdigit(*isa))
+ break;
+ /* Skip the minor version */
+ while (isdigit(*++isa))
+ ;
+ if (*isa != 'p')
+ break;
+ if (!isdigit(*++isa)) {
+ --isa;
+ break;
+ }
+ /* Skip the major version */
+ while (isdigit(*++isa))
+ ;
+ break;
+ }
+ if (*isa != '_')
+ --isa;
+
+#define SET_ISA_EXT_MAP(name, bit) \
+ do { \
+ if ((ext_end - ext == sizeof(name) - 1) && \
+ !memcmp(ext, name, sizeof(name) - 1)) \
+ set_bit(bit, this_isa); \
+ } while (false) \
+
+ if (unlikely(ext_err))
+ continue;
+ if (!ext_long) {
+ this_hwcap |= isa2hwcap[(unsigned char)(*ext)];
+ set_bit(*ext - 'a', this_isa);
+ } else {
+ SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF);
+ }
+#undef SET_ISA_EXT_MAP
}
/*
@@ -119,10 +206,11 @@ void __init riscv_fill_hwcap(void)
else
elf_hwcap = this_hwcap;
- if (riscv_isa[0])
- riscv_isa[0] &= this_isa;
+ if (bitmap_weight(riscv_isa, RISCV_ISA_EXT_MAX))
+ bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
else
- riscv_isa[0] = this_isa;
+ bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
+
}
/* We don't support systems with F but without D, so mask those out
@@ -133,13 +221,13 @@ void __init riscv_fill_hwcap(void)
}
memset(print_str, 0, sizeof(print_str));
- for (i = 0, j = 0; i < BITS_PER_LONG; i++)
+ for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++)
if (riscv_isa[0] & BIT_MASK(i))
print_str[j++] = (char)('a' + i);
- pr_info("riscv: ISA extensions %s\n", print_str);
+ pr_info("riscv: base ISA extensions %s\n", print_str);
memset(print_str, 0, sizeof(print_str));
- for (i = 0, j = 0; i < BITS_PER_LONG; i++)
+ for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++)
if (elf_hwcap & BIT_MASK(i))
print_str[j++] = (char)('a' + i);
pr_info("riscv: ELF capabilities %s\n", print_str);
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index d6a46ed0bf05..c8b9ce274b9a 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -225,6 +225,10 @@ ret_from_syscall:
* (If it was configured with SECCOMP_RET_ERRNO/TRACE)
*/
ret_from_syscall_rejected:
+#ifdef CONFIG_DEBUG_RSEQ
+ move a0, sp
+ call rseq_syscall
+#endif
/* Trace syscalls, but only if requested by the user. */
REG_L t0, TASK_TI_FLAGS(tp)
andi t0, t0, _TIF_SYSCALL_WORK
diff --git a/arch/riscv/kernel/perf_event.c b/arch/riscv/kernel/perf_event.c
deleted file mode 100644
index c835f0362d94..000000000000
--- a/arch/riscv/kernel/perf_event.c
+++ /dev/null
@@ -1,485 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
- * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
- * Copyright (C) 2009 Jaswinder Singh Rajput
- * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
- * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
- * Copyright (C) 2009 Google, Inc., Stephane Eranian
- * Copyright 2014 Tilera Corporation. All Rights Reserved.
- * Copyright (C) 2018 Andes Technology Corporation
- *
- * Perf_events support for RISC-V platforms.
- *
- * Since the spec. (as of now, Priv-Spec 1.10) does not provide enough
- * functionality for perf event to fully work, this file provides
- * the very basic framework only.
- *
- * For platform portings, please check Documentations/riscv/pmu.txt.
- *
- * The Copyright line includes x86 and tile ones.
- */
-
-#include <linux/kprobes.h>
-#include <linux/kernel.h>
-#include <linux/kdebug.h>
-#include <linux/mutex.h>
-#include <linux/bitmap.h>
-#include <linux/irq.h>
-#include <linux/perf_event.h>
-#include <linux/atomic.h>
-#include <linux/of.h>
-#include <asm/perf_event.h>
-
-static const struct riscv_pmu *riscv_pmu __read_mostly;
-static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
-
-/*
- * Hardware & cache maps and their methods
- */
-
-static const int riscv_hw_event_map[] = {
- [PERF_COUNT_HW_CPU_CYCLES] = RISCV_PMU_CYCLE,
- [PERF_COUNT_HW_INSTRUCTIONS] = RISCV_PMU_INSTRET,
- [PERF_COUNT_HW_CACHE_REFERENCES] = RISCV_OP_UNSUPP,
- [PERF_COUNT_HW_CACHE_MISSES] = RISCV_OP_UNSUPP,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = RISCV_OP_UNSUPP,
- [PERF_COUNT_HW_BRANCH_MISSES] = RISCV_OP_UNSUPP,
- [PERF_COUNT_HW_BUS_CYCLES] = RISCV_OP_UNSUPP,
-};
-
-#define C(x) PERF_COUNT_HW_CACHE_##x
-static const int riscv_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
-[PERF_COUNT_HW_CACHE_OP_MAX]
-[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(L1D)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- },
- [C(L1I)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- },
- [C(DTLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- },
-};
-
-static int riscv_map_hw_event(u64 config)
-{
- if (config >= riscv_pmu->max_events)
- return -EINVAL;
-
- return riscv_pmu->hw_events[config];
-}
-
-static int riscv_map_cache_decode(u64 config, unsigned int *type,
- unsigned int *op, unsigned int *result)
-{
- return -ENOENT;
-}
-
-static int riscv_map_cache_event(u64 config)
-{
- unsigned int type, op, result;
- int err = -ENOENT;
- int code;
-
- err = riscv_map_cache_decode(config, &type, &op, &result);
- if (!riscv_pmu->cache_events || err)
- return err;
-
- if (type >= PERF_COUNT_HW_CACHE_MAX ||
- op >= PERF_COUNT_HW_CACHE_OP_MAX ||
- result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
- return -EINVAL;
-
- code = (*riscv_pmu->cache_events)[type][op][result];
- if (code == RISCV_OP_UNSUPP)
- return -EINVAL;
-
- return code;
-}
-
-/*
- * Low-level functions: reading/writing counters
- */
-
-static inline u64 read_counter(int idx)
-{
- u64 val = 0;
-
- switch (idx) {
- case RISCV_PMU_CYCLE:
- val = csr_read(CSR_CYCLE);
- break;
- case RISCV_PMU_INSTRET:
- val = csr_read(CSR_INSTRET);
- break;
- default:
- WARN_ON_ONCE(idx < 0 || idx > RISCV_MAX_COUNTERS);
- return -EINVAL;
- }
-
- return val;
-}
-
-static inline void write_counter(int idx, u64 value)
-{
- /* currently not supported */
- WARN_ON_ONCE(1);
-}
-
-/*
- * pmu->read: read and update the counter
- *
- * Other architectures' implementation often have a xxx_perf_event_update
- * routine, which can return counter values when called in the IRQ, but
- * return void when being called by the pmu->read method.
- */
-static void riscv_pmu_read(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- u64 prev_raw_count, new_raw_count;
- u64 oldval;
- int idx = hwc->idx;
- u64 delta;
-
- do {
- prev_raw_count = local64_read(&hwc->prev_count);
- new_raw_count = read_counter(idx);
-
- oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count,
- new_raw_count);
- } while (oldval != prev_raw_count);
-
- /*
- * delta is the value to update the counter we maintain in the kernel.
- */
- delta = (new_raw_count - prev_raw_count) &
- ((1ULL << riscv_pmu->counter_width) - 1);
- local64_add(delta, &event->count);
- /*
- * Something like local64_sub(delta, &hwc->period_left) here is
- * needed if there is an interrupt for perf.
- */
-}
-
-/*
- * State transition functions:
- *
- * stop()/start() & add()/del()
- */
-
-/*
- * pmu->stop: stop the counter
- */
-static void riscv_pmu_stop(struct perf_event *event, int flags)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
- hwc->state |= PERF_HES_STOPPED;
-
- if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
- riscv_pmu->pmu->read(event);
- hwc->state |= PERF_HES_UPTODATE;
- }
-}
-
-/*
- * pmu->start: start the event.
- */
-static void riscv_pmu_start(struct perf_event *event, int flags)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
- return;
-
- if (flags & PERF_EF_RELOAD) {
- WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
-
- /*
- * Set the counter to the period to the next interrupt here,
- * if you have any.
- */
- }
-
- hwc->state = 0;
- perf_event_update_userpage(event);
-
- /*
- * Since we cannot write to counters, this serves as an initialization
- * to the delta-mechanism in pmu->read(); otherwise, the delta would be
- * wrong when pmu->read is called for the first time.
- */
- local64_set(&hwc->prev_count, read_counter(hwc->idx));
-}
-
-/*
- * pmu->add: add the event to PMU.
- */
-static int riscv_pmu_add(struct perf_event *event, int flags)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct hw_perf_event *hwc = &event->hw;
-
- if (cpuc->n_events == riscv_pmu->num_counters)
- return -ENOSPC;
-
- /*
- * We don't have general conunters, so no binding-event-to-counter
- * process here.
- *
- * Indexing using hwc->config generally not works, since config may
- * contain extra information, but here the only info we have in
- * hwc->config is the event index.
- */
- hwc->idx = hwc->config;
- cpuc->events[hwc->idx] = event;
- cpuc->n_events++;
-
- hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
-
- if (flags & PERF_EF_START)
- riscv_pmu->pmu->start(event, PERF_EF_RELOAD);
-
- return 0;
-}
-
-/*
- * pmu->del: delete the event from PMU.
- */
-static void riscv_pmu_del(struct perf_event *event, int flags)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct hw_perf_event *hwc = &event->hw;
-
- cpuc->events[hwc->idx] = NULL;
- cpuc->n_events--;
- riscv_pmu->pmu->stop(event, PERF_EF_UPDATE);
- perf_event_update_userpage(event);
-}
-
-/*
- * Interrupt: a skeletion for reference.
- */
-
-static DEFINE_MUTEX(pmc_reserve_mutex);
-
-static irqreturn_t riscv_base_pmu_handle_irq(int irq_num, void *dev)
-{
- return IRQ_NONE;
-}
-
-static int reserve_pmc_hardware(void)
-{
- int err = 0;
-
- mutex_lock(&pmc_reserve_mutex);
- if (riscv_pmu->irq >= 0 && riscv_pmu->handle_irq) {
- err = request_irq(riscv_pmu->irq, riscv_pmu->handle_irq,
- IRQF_PERCPU, "riscv-base-perf", NULL);
- }
- mutex_unlock(&pmc_reserve_mutex);
-
- return err;
-}
-
-static void release_pmc_hardware(void)
-{
- mutex_lock(&pmc_reserve_mutex);
- if (riscv_pmu->irq >= 0)
- free_irq(riscv_pmu->irq, NULL);
- mutex_unlock(&pmc_reserve_mutex);
-}
-
-/*
- * Event Initialization/Finalization
- */
-
-static atomic_t riscv_active_events = ATOMIC_INIT(0);
-
-static void riscv_event_destroy(struct perf_event *event)
-{
- if (atomic_dec_return(&riscv_active_events) == 0)
- release_pmc_hardware();
-}
-
-static int riscv_event_init(struct perf_event *event)
-{
- struct perf_event_attr *attr = &event->attr;
- struct hw_perf_event *hwc = &event->hw;
- int err;
- int code;
-
- if (atomic_inc_return(&riscv_active_events) == 1) {
- err = reserve_pmc_hardware();
-
- if (err) {
- pr_warn("PMC hardware not available\n");
- atomic_dec(&riscv_active_events);
- return -EBUSY;
- }
- }
-
- switch (event->attr.type) {
- case PERF_TYPE_HARDWARE:
- code = riscv_pmu->map_hw_event(attr->config);
- break;
- case PERF_TYPE_HW_CACHE:
- code = riscv_pmu->map_cache_event(attr->config);
- break;
- case PERF_TYPE_RAW:
- return -EOPNOTSUPP;
- default:
- return -ENOENT;
- }
-
- event->destroy = riscv_event_destroy;
- if (code < 0) {
- event->destroy(event);
- return code;
- }
-
- /*
- * idx is set to -1 because the index of a general event should not be
- * decided until binding to some counter in pmu->add().
- *
- * But since we don't have such support, later in pmu->add(), we just
- * use hwc->config as the index instead.
- */
- hwc->config = code;
- hwc->idx = -1;
-
- return 0;
-}
-
-/*
- * Initialization
- */
-
-static struct pmu min_pmu = {
- .name = "riscv-base",
- .event_init = riscv_event_init,
- .add = riscv_pmu_add,
- .del = riscv_pmu_del,
- .start = riscv_pmu_start,
- .stop = riscv_pmu_stop,
- .read = riscv_pmu_read,
-};
-
-static const struct riscv_pmu riscv_base_pmu = {
- .pmu = &min_pmu,
- .max_events = ARRAY_SIZE(riscv_hw_event_map),
- .map_hw_event = riscv_map_hw_event,
- .hw_events = riscv_hw_event_map,
- .map_cache_event = riscv_map_cache_event,
- .cache_events = &riscv_cache_event_map,
- .counter_width = 63,
- .num_counters = RISCV_BASE_COUNTERS + 0,
- .handle_irq = &riscv_base_pmu_handle_irq,
-
- /* This means this PMU has no IRQ. */
- .irq = -1,
-};
-
-static const struct of_device_id riscv_pmu_of_ids[] = {
- {.compatible = "riscv,base-pmu", .data = &riscv_base_pmu},
- { /* sentinel value */ }
-};
-
-static int __init init_hw_perf_events(void)
-{
- struct device_node *node = of_find_node_by_type(NULL, "pmu");
- const struct of_device_id *of_id;
-
- riscv_pmu = &riscv_base_pmu;
-
- if (node) {
- of_id = of_match_node(riscv_pmu_of_ids, node);
-
- if (of_id)
- riscv_pmu = of_id->data;
- of_node_put(node);
- }
-
- perf_pmu_register(riscv_pmu->pmu, "cpu", PERF_TYPE_RAW);
- return 0;
-}
-arch_initcall(init_hw_perf_events);
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index c2d5ecbe5526..16da3c3b53a1 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -258,6 +258,8 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
}
}
+ rseq_signal_deliver(ksig, regs);
+
/* Set up the stack frame */
ret = setup_rt_frame(ksig, oldset, regs);
diff --git a/arch/riscv/lib/memmove.S b/arch/riscv/lib/memmove.S
index 07d1d2152ba5..e0609e1f0864 100644
--- a/arch/riscv/lib/memmove.S
+++ b/arch/riscv/lib/memmove.S
@@ -1,64 +1,316 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022 Michael T. Kloos <michael@michaelkloos.com>
+ */
#include <linux/linkage.h>
#include <asm/asm.h>
-ENTRY(__memmove)
-WEAK(memmove)
- move t0, a0
- move t1, a1
-
- beq a0, a1, exit_memcpy
- beqz a2, exit_memcpy
- srli t2, a2, 0x2
-
- slt t3, a0, a1
- beqz t3, do_reverse
-
- andi a2, a2, 0x3
- li t4, 1
- beqz t2, byte_copy
-
-word_copy:
- lw t3, 0(a1)
- addi t2, t2, -1
- addi a1, a1, 4
- sw t3, 0(a0)
- addi a0, a0, 4
- bnez t2, word_copy
- beqz a2, exit_memcpy
- j byte_copy
-
-do_reverse:
- add a0, a0, a2
- add a1, a1, a2
- andi a2, a2, 0x3
- li t4, -1
- beqz t2, reverse_byte_copy
-
-reverse_word_copy:
- addi a1, a1, -4
- addi t2, t2, -1
- lw t3, 0(a1)
- addi a0, a0, -4
- sw t3, 0(a0)
- bnez t2, reverse_word_copy
- beqz a2, exit_memcpy
-
-reverse_byte_copy:
- addi a0, a0, -1
- addi a1, a1, -1
+SYM_FUNC_START(__memmove)
+SYM_FUNC_START_WEAK(memmove)
+ /*
+ * Returns
+ * a0 - dest
+ *
+ * Parameters
+ * a0 - Inclusive first byte of dest
+ * a1 - Inclusive first byte of src
+ * a2 - Length of copy n
+ *
+ * Because the return matches the parameter register a0,
+ * we will not clobber or modify that register.
+ *
+ * Note: This currently only works on little-endian.
+ * To port to big-endian, reverse the direction of shifts
+ * in the 2 misaligned fixup copy loops.
+ */
+ /* Return if nothing to do */
+ beq a0, a1, return_from_memmove
+ beqz a2, return_from_memmove
+
+ /*
+ * Register Uses
+ * Forward Copy: a1 - Index counter of src
+ * Reverse Copy: a4 - Index counter of src
+ * Forward Copy: t3 - Index counter of dest
+ * Reverse Copy: t4 - Index counter of dest
+ * Both Copy Modes: t5 - Inclusive first multibyte/aligned of dest
+ * Both Copy Modes: t6 - Non-Inclusive last multibyte/aligned of dest
+ * Both Copy Modes: t0 - Link / Temporary for load-store
+ * Both Copy Modes: t1 - Temporary for load-store
+ * Both Copy Modes: t2 - Temporary for load-store
+ * Both Copy Modes: a5 - dest to src alignment offset
+ * Both Copy Modes: a6 - Shift ammount
+ * Both Copy Modes: a7 - Inverse Shift ammount
+ * Both Copy Modes: a2 - Alternate breakpoint for unrolled loops
+ */
+
+ /*
+ * Solve for some register values now.
+ * Byte copy does not need t5 or t6.
+ */
+ mv t3, a0
+ add t4, a0, a2
+ add a4, a1, a2
+
+ /*
+ * Byte copy if copying less than (2 * SZREG) bytes. This can
+ * cause problems with the bulk copy implementation and is
+ * small enough not to bother.
+ */
+ andi t0, a2, -(2 * SZREG)
+ beqz t0, byte_copy
+
+ /*
+ * Now solve for t5 and t6.
+ */
+ andi t5, t3, -SZREG
+ andi t6, t4, -SZREG
+ /*
+ * If dest(Register t3) rounded down to the nearest naturally
+ * aligned SZREG address, does not equal dest, then add SZREG
+ * to find the low-bound of SZREG alignment in the dest memory
+ * region. Note that this could overshoot the dest memory
+ * region if n is less than SZREG. This is one reason why
+ * we always byte copy if n is less than SZREG.
+ * Otherwise, dest is already naturally aligned to SZREG.
+ */
+ beq t5, t3, 1f
+ addi t5, t5, SZREG
+ 1:
+
+ /*
+ * If the dest and src are co-aligned to SZREG, then there is
+ * no need for the full rigmarole of a full misaligned fixup copy.
+ * Instead, do a simpler co-aligned copy.
+ */
+ xor t0, a0, a1
+ andi t1, t0, (SZREG - 1)
+ beqz t1, coaligned_copy
+ /* Fall through to misaligned fixup copy */
+
+misaligned_fixup_copy:
+ bltu a1, a0, misaligned_fixup_copy_reverse
+
+misaligned_fixup_copy_forward:
+ jal t0, byte_copy_until_aligned_forward
+
+ andi a5, a1, (SZREG - 1) /* Find the alignment offset of src (a1) */
+ slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */
+ sub a5, a1, t3 /* Find the difference between src and dest */
+ andi a1, a1, -SZREG /* Align the src pointer */
+ addi a2, t6, SZREG /* The other breakpoint for the unrolled loop*/
+
+ /*
+ * Compute The Inverse Shift
+ * a7 = XLEN - a6 = XLEN + -a6
+ * 2s complement negation to find the negative: -a6 = ~a6 + 1
+ * Add that to XLEN. XLEN = SZREG * 8.
+ */
+ not a7, a6
+ addi a7, a7, (SZREG * 8 + 1)
+
+ /*
+ * Fix Misalignment Copy Loop - Forward
+ * load_val0 = load_ptr[0];
+ * do {
+ * load_val1 = load_ptr[1];
+ * store_ptr += 2;
+ * store_ptr[0 - 2] = (load_val0 >> {a6}) | (load_val1 << {a7});
+ *
+ * if (store_ptr == {a2})
+ * break;
+ *
+ * load_val0 = load_ptr[2];
+ * load_ptr += 2;
+ * store_ptr[1 - 2] = (load_val1 >> {a6}) | (load_val0 << {a7});
+ *
+ * } while (store_ptr != store_ptr_end);
+ * store_ptr = store_ptr_end;
+ */
+
+ REG_L t0, (0 * SZREG)(a1)
+ 1:
+ REG_L t1, (1 * SZREG)(a1)
+ addi t3, t3, (2 * SZREG)
+ srl t0, t0, a6
+ sll t2, t1, a7
+ or t2, t0, t2
+ REG_S t2, ((0 * SZREG) - (2 * SZREG))(t3)
+
+ beq t3, a2, 2f
+
+ REG_L t0, (2 * SZREG)(a1)
+ addi a1, a1, (2 * SZREG)
+ srl t1, t1, a6
+ sll t2, t0, a7
+ or t2, t1, t2
+ REG_S t2, ((1 * SZREG) - (2 * SZREG))(t3)
+
+ bne t3, t6, 1b
+ 2:
+ mv t3, t6 /* Fix the dest pointer in case the loop was broken */
+
+ add a1, t3, a5 /* Restore the src pointer */
+ j byte_copy_forward /* Copy any remaining bytes */
+
+misaligned_fixup_copy_reverse:
+ jal t0, byte_copy_until_aligned_reverse
+
+ andi a5, a4, (SZREG - 1) /* Find the alignment offset of src (a4) */
+ slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */
+ sub a5, a4, t4 /* Find the difference between src and dest */
+ andi a4, a4, -SZREG /* Align the src pointer */
+ addi a2, t5, -SZREG /* The other breakpoint for the unrolled loop*/
+
+ /*
+ * Compute The Inverse Shift
+ * a7 = XLEN - a6 = XLEN + -a6
+ * 2s complement negation to find the negative: -a6 = ~a6 + 1
+ * Add that to XLEN. XLEN = SZREG * 8.
+ */
+ not a7, a6
+ addi a7, a7, (SZREG * 8 + 1)
+
+ /*
+ * Fix Misalignment Copy Loop - Reverse
+ * load_val1 = load_ptr[0];
+ * do {
+ * load_val0 = load_ptr[-1];
+ * store_ptr -= 2;
+ * store_ptr[1] = (load_val0 >> {a6}) | (load_val1 << {a7});
+ *
+ * if (store_ptr == {a2})
+ * break;
+ *
+ * load_val1 = load_ptr[-2];
+ * load_ptr -= 2;
+ * store_ptr[0] = (load_val1 >> {a6}) | (load_val0 << {a7});
+ *
+ * } while (store_ptr != store_ptr_end);
+ * store_ptr = store_ptr_end;
+ */
+
+ REG_L t1, ( 0 * SZREG)(a4)
+ 1:
+ REG_L t0, (-1 * SZREG)(a4)
+ addi t4, t4, (-2 * SZREG)
+ sll t1, t1, a7
+ srl t2, t0, a6
+ or t2, t1, t2
+ REG_S t2, ( 1 * SZREG)(t4)
+
+ beq t4, a2, 2f
+
+ REG_L t1, (-2 * SZREG)(a4)
+ addi a4, a4, (-2 * SZREG)
+ sll t0, t0, a7
+ srl t2, t1, a6
+ or t2, t0, t2
+ REG_S t2, ( 0 * SZREG)(t4)
+
+ bne t4, t5, 1b
+ 2:
+ mv t4, t5 /* Fix the dest pointer in case the loop was broken */
+
+ add a4, t4, a5 /* Restore the src pointer */
+ j byte_copy_reverse /* Copy any remaining bytes */
+
+/*
+ * Simple copy loops for SZREG co-aligned memory locations.
+ * These also make calls to do byte copies for any unaligned
+ * data at their terminations.
+ */
+coaligned_copy:
+ bltu a1, a0, coaligned_copy_reverse
+
+coaligned_copy_forward:
+ jal t0, byte_copy_until_aligned_forward
+
+ 1:
+ REG_L t1, ( 0 * SZREG)(a1)
+ addi a1, a1, SZREG
+ addi t3, t3, SZREG
+ REG_S t1, (-1 * SZREG)(t3)
+ bne t3, t6, 1b
+
+ j byte_copy_forward /* Copy any remaining bytes */
+
+coaligned_copy_reverse:
+ jal t0, byte_copy_until_aligned_reverse
+
+ 1:
+ REG_L t1, (-1 * SZREG)(a4)
+ addi a4, a4, -SZREG
+ addi t4, t4, -SZREG
+ REG_S t1, ( 0 * SZREG)(t4)
+ bne t4, t5, 1b
+
+ j byte_copy_reverse /* Copy any remaining bytes */
+
+/*
+ * These are basically sub-functions within the function. They
+ * are used to byte copy until the dest pointer is in alignment.
+ * At which point, a bulk copy method can be used by the
+ * calling code. These work on the same registers as the bulk
+ * copy loops. Therefore, the register values can be picked
+ * up from where they were left and we avoid code duplication
+ * without any overhead except the call in and return jumps.
+ */
+byte_copy_until_aligned_forward:
+ beq t3, t5, 2f
+ 1:
+ lb t1, 0(a1)
+ addi a1, a1, 1
+ addi t3, t3, 1
+ sb t1, -1(t3)
+ bne t3, t5, 1b
+ 2:
+ jalr zero, 0x0(t0) /* Return to multibyte copy loop */
+
+byte_copy_until_aligned_reverse:
+ beq t4, t6, 2f
+ 1:
+ lb t1, -1(a4)
+ addi a4, a4, -1
+ addi t4, t4, -1
+ sb t1, 0(t4)
+ bne t4, t6, 1b
+ 2:
+ jalr zero, 0x0(t0) /* Return to multibyte copy loop */
+
+/*
+ * Simple byte copy loops.
+ * These will byte copy until they reach the end of data to copy.
+ * At that point, they will call to return from memmove.
+ */
byte_copy:
- lb t3, 0(a1)
- addi a2, a2, -1
- sb t3, 0(a0)
- add a1, a1, t4
- add a0, a0, t4
- bnez a2, byte_copy
-
-exit_memcpy:
- move a0, t0
- move a1, t1
- ret
-END(__memmove)
+ bltu a1, a0, byte_copy_reverse
+
+byte_copy_forward:
+ beq t3, t4, 2f
+ 1:
+ lb t1, 0(a1)
+ addi a1, a1, 1
+ addi t3, t3, 1
+ sb t1, -1(t3)
+ bne t3, t4, 1b
+ 2:
+ ret
+
+byte_copy_reverse:
+ beq t4, t3, 2f
+ 1:
+ lb t1, -1(a4)
+ addi a4, a4, -1
+ addi t4, t4, -1
+ sb t1, 0(t4)
+ bne t4, t3, 1b
+ 2:
+
+return_from_memmove:
+ ret
+
+SYM_FUNC_END(memmove)
+SYM_FUNC_END(__memmove)
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index f6275b317129..9535bea8688c 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -38,14 +38,16 @@ EXPORT_SYMBOL(kernel_map);
#endif
#ifdef CONFIG_64BIT
-u64 satp_mode = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_48 : SATP_MODE_39;
+u64 satp_mode __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_57 : SATP_MODE_39;
#else
-u64 satp_mode = SATP_MODE_32;
+u64 satp_mode __ro_after_init = SATP_MODE_32;
#endif
EXPORT_SYMBOL(satp_mode);
bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL);
+bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL);
EXPORT_SYMBOL(pgtable_l4_enabled);
+EXPORT_SYMBOL(pgtable_l5_enabled);
phys_addr_t phys_ram_base __ro_after_init;
EXPORT_SYMBOL(phys_ram_base);
@@ -227,6 +229,7 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+static p4d_t __maybe_unused early_dtb_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
@@ -318,6 +321,16 @@ static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
#define early_pmd ((pmd_t *)XIP_FIXUP(early_pmd))
#endif /* CONFIG_XIP_KERNEL */
+static p4d_t trampoline_p4d[PTRS_PER_P4D] __page_aligned_bss;
+static p4d_t fixmap_p4d[PTRS_PER_P4D] __page_aligned_bss;
+static p4d_t early_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
+
+#ifdef CONFIG_XIP_KERNEL
+#define trampoline_p4d ((p4d_t *)XIP_FIXUP(trampoline_p4d))
+#define fixmap_p4d ((p4d_t *)XIP_FIXUP(fixmap_p4d))
+#define early_p4d ((p4d_t *)XIP_FIXUP(early_p4d))
+#endif /* CONFIG_XIP_KERNEL */
+
static pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss;
static pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss;
static pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
@@ -432,6 +445,44 @@ static phys_addr_t alloc_pud_late(uintptr_t va)
return __pa(vaddr);
}
+static p4d_t *__init get_p4d_virt_early(phys_addr_t pa)
+{
+ return (p4d_t *)((uintptr_t)pa);
+}
+
+static p4d_t *__init get_p4d_virt_fixmap(phys_addr_t pa)
+{
+ clear_fixmap(FIX_P4D);
+ return (p4d_t *)set_fixmap_offset(FIX_P4D, pa);
+}
+
+static p4d_t *__init get_p4d_virt_late(phys_addr_t pa)
+{
+ return (p4d_t *)__va(pa);
+}
+
+static phys_addr_t __init alloc_p4d_early(uintptr_t va)
+{
+ /* Only one P4D is available for early mapping */
+ BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);
+
+ return (uintptr_t)early_p4d;
+}
+
+static phys_addr_t __init alloc_p4d_fixmap(uintptr_t va)
+{
+ return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+}
+
+static phys_addr_t alloc_p4d_late(uintptr_t va)
+{
+ unsigned long vaddr;
+
+ vaddr = __get_free_page(GFP_KERNEL);
+ BUG_ON(!vaddr);
+ return __pa(vaddr);
+}
+
static void __init create_pud_mapping(pud_t *pudp,
uintptr_t va, phys_addr_t pa,
phys_addr_t sz, pgprot_t prot)
@@ -459,21 +510,55 @@ static void __init create_pud_mapping(pud_t *pudp,
create_pmd_mapping(nextp, va, pa, sz, prot);
}
-#define pgd_next_t pud_t
-#define alloc_pgd_next(__va) (pgtable_l4_enabled ? \
- pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va))
-#define get_pgd_next_virt(__pa) (pgtable_l4_enabled ? \
- pt_ops.get_pud_virt(__pa) : (pgd_next_t *)pt_ops.get_pmd_virt(__pa))
+static void __init create_p4d_mapping(p4d_t *p4dp,
+ uintptr_t va, phys_addr_t pa,
+ phys_addr_t sz, pgprot_t prot)
+{
+ pud_t *nextp;
+ phys_addr_t next_phys;
+ uintptr_t p4d_index = p4d_index(va);
+
+ if (sz == P4D_SIZE) {
+ if (p4d_val(p4dp[p4d_index]) == 0)
+ p4dp[p4d_index] = pfn_p4d(PFN_DOWN(pa), prot);
+ return;
+ }
+
+ if (p4d_val(p4dp[p4d_index]) == 0) {
+ next_phys = pt_ops.alloc_pud(va);
+ p4dp[p4d_index] = pfn_p4d(PFN_DOWN(next_phys), PAGE_TABLE);
+ nextp = pt_ops.get_pud_virt(next_phys);
+ memset(nextp, 0, PAGE_SIZE);
+ } else {
+ next_phys = PFN_PHYS(_p4d_pfn(p4dp[p4d_index]));
+ nextp = pt_ops.get_pud_virt(next_phys);
+ }
+
+ create_pud_mapping(nextp, va, pa, sz, prot);
+}
+
+#define pgd_next_t p4d_t
+#define alloc_pgd_next(__va) (pgtable_l5_enabled ? \
+ pt_ops.alloc_p4d(__va) : (pgtable_l4_enabled ? \
+ pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va)))
+#define get_pgd_next_virt(__pa) (pgtable_l5_enabled ? \
+ pt_ops.get_p4d_virt(__pa) : (pgd_next_t *)(pgtable_l4_enabled ? \
+ pt_ops.get_pud_virt(__pa) : (pud_t *)pt_ops.get_pmd_virt(__pa)))
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
+ (pgtable_l5_enabled ? \
+ create_p4d_mapping(__nextp, __va, __pa, __sz, __prot) : \
(pgtable_l4_enabled ? \
- create_pud_mapping(__nextp, __va, __pa, __sz, __prot) : \
- create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot))
-#define fixmap_pgd_next (pgtable_l4_enabled ? \
- (uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd)
-#define trampoline_pgd_next (pgtable_l4_enabled ? \
- (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd)
-#define early_dtb_pgd_next (pgtable_l4_enabled ? \
- (uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd)
+ create_pud_mapping((pud_t *)__nextp, __va, __pa, __sz, __prot) : \
+ create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot)))
+#define fixmap_pgd_next (pgtable_l5_enabled ? \
+ (uintptr_t)fixmap_p4d : (pgtable_l4_enabled ? \
+ (uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd))
+#define trampoline_pgd_next (pgtable_l5_enabled ? \
+ (uintptr_t)trampoline_p4d : (pgtable_l4_enabled ? \
+ (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd))
+#define early_dtb_pgd_next (pgtable_l5_enabled ? \
+ (uintptr_t)early_dtb_p4d : (pgtable_l4_enabled ? \
+ (uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd))
#else
#define pgd_next_t pte_t
#define alloc_pgd_next(__va) pt_ops.alloc_pte(__va)
@@ -482,6 +567,7 @@ static void __init create_pud_mapping(pud_t *pudp,
create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
#define fixmap_pgd_next ((uintptr_t)fixmap_pte)
#define early_dtb_pgd_next ((uintptr_t)early_dtb_pmd)
+#define create_p4d_mapping(__pmdp, __va, __pa, __sz, __prot)
#define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot)
#define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot)
#endif /* __PAGETABLE_PMD_FOLDED */
@@ -575,6 +661,13 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
#endif /* CONFIG_STRICT_KERNEL_RWX */
#ifdef CONFIG_64BIT
+static void __init disable_pgtable_l5(void)
+{
+ pgtable_l5_enabled = false;
+ kernel_map.page_offset = PAGE_OFFSET_L4;
+ satp_mode = SATP_MODE_48;
+}
+
static void __init disable_pgtable_l4(void)
{
pgtable_l4_enabled = false;
@@ -591,12 +684,12 @@ static void __init disable_pgtable_l4(void)
static __init void set_satp_mode(void)
{
u64 identity_satp, hw_satp;
- uintptr_t set_satp_mode_pmd;
+ uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
+ bool check_l4 = false;
- set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
- create_pgd_mapping(early_pg_dir,
- set_satp_mode_pmd, (uintptr_t)early_pud,
- PGDIR_SIZE, PAGE_TABLE);
+ create_p4d_mapping(early_p4d,
+ set_satp_mode_pmd, (uintptr_t)early_pud,
+ P4D_SIZE, PAGE_TABLE);
create_pud_mapping(early_pud,
set_satp_mode_pmd, (uintptr_t)early_pmd,
PUD_SIZE, PAGE_TABLE);
@@ -608,6 +701,11 @@ static __init void set_satp_mode(void)
set_satp_mode_pmd + PMD_SIZE,
set_satp_mode_pmd + PMD_SIZE,
PMD_SIZE, PAGE_KERNEL_EXEC);
+retry:
+ create_pgd_mapping(early_pg_dir,
+ set_satp_mode_pmd,
+ check_l4 ? (uintptr_t)early_pud : (uintptr_t)early_p4d,
+ PGDIR_SIZE, PAGE_TABLE);
identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode;
@@ -616,10 +714,17 @@ static __init void set_satp_mode(void)
hw_satp = csr_swap(CSR_SATP, 0ULL);
local_flush_tlb_all();
- if (hw_satp != identity_satp)
+ if (hw_satp != identity_satp) {
+ if (!check_l4) {
+ disable_pgtable_l5();
+ check_l4 = true;
+ goto retry;
+ }
disable_pgtable_l4();
+ }
memset(early_pg_dir, 0, PAGE_SIZE);
+ memset(early_p4d, 0, PAGE_SIZE);
memset(early_pud, 0, PAGE_SIZE);
memset(early_pmd, 0, PAGE_SIZE);
}
@@ -693,10 +798,13 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
PGDIR_SIZE,
IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
- if (pgtable_l4_enabled) {
+ if (pgtable_l5_enabled)
+ create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA,
+ (uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE);
+
+ if (pgtable_l4_enabled)
create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA,
(uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE);
- }
if (IS_ENABLED(CONFIG_64BIT)) {
create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
@@ -732,6 +840,8 @@ void __init pt_ops_set_early(void)
pt_ops.get_pmd_virt = get_pmd_virt_early;
pt_ops.alloc_pud = alloc_pud_early;
pt_ops.get_pud_virt = get_pud_virt_early;
+ pt_ops.alloc_p4d = alloc_p4d_early;
+ pt_ops.get_p4d_virt = get_p4d_virt_early;
#endif
}
@@ -752,6 +862,8 @@ void __init pt_ops_set_fixmap(void)
pt_ops.get_pmd_virt = kernel_mapping_pa_to_va((uintptr_t)get_pmd_virt_fixmap);
pt_ops.alloc_pud = kernel_mapping_pa_to_va((uintptr_t)alloc_pud_fixmap);
pt_ops.get_pud_virt = kernel_mapping_pa_to_va((uintptr_t)get_pud_virt_fixmap);
+ pt_ops.alloc_p4d = kernel_mapping_pa_to_va((uintptr_t)alloc_p4d_fixmap);
+ pt_ops.get_p4d_virt = kernel_mapping_pa_to_va((uintptr_t)get_p4d_virt_fixmap);
#endif
}
@@ -768,6 +880,8 @@ void __init pt_ops_set_late(void)
pt_ops.get_pmd_virt = get_pmd_virt_late;
pt_ops.alloc_pud = alloc_pud_late;
pt_ops.get_pud_virt = get_pud_virt_late;
+ pt_ops.alloc_p4d = alloc_p4d_late;
+ pt_ops.get_p4d_virt = get_p4d_virt_late;
#endif
}
@@ -828,6 +942,10 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
#ifndef __PAGETABLE_PMD_FOLDED
+ /* Setup fixmap P4D and PUD */
+ if (pgtable_l5_enabled)
+ create_p4d_mapping(fixmap_p4d, FIXADDR_START,
+ (uintptr_t)fixmap_pud, P4D_SIZE, PAGE_TABLE);
/* Setup fixmap PUD and PMD */
if (pgtable_l4_enabled)
create_pud_mapping(fixmap_pud, FIXADDR_START,
@@ -837,6 +955,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
/* Setup trampoline PGD and PMD */
create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE);
+ if (pgtable_l5_enabled)
+ create_p4d_mapping(trampoline_p4d, kernel_map.virt_addr,
+ (uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE);
if (pgtable_l4_enabled)
create_pud_mapping(trampoline_pud, kernel_map.virt_addr,
(uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE);
@@ -938,6 +1059,7 @@ static void __init setup_vm_final(void)
clear_fixmap(FIX_PTE);
clear_fixmap(FIX_PMD);
clear_fixmap(FIX_PUD);
+ clear_fixmap(FIX_P4D);
/* Move to swapper page table */
csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | satp_mode);
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index cd1a145257b7..a22e418dbd82 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -111,6 +111,8 @@ static void __init kasan_populate_pud(pgd_t *pgd,
* pt_ops facility.
*/
base_pud = pt_ops.get_pud_virt(pfn_to_phys(_pgd_pfn(*pgd)));
+ } else if (pgd_none(*pgd)) {
+ base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE);
} else {
base_pud = (pud_t *)pgd_page_vaddr(*pgd);
if (base_pud == lm_alias(kasan_early_shadow_pud)) {
@@ -152,13 +154,72 @@ static void __init kasan_populate_pud(pgd_t *pgd,
set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pud)), PAGE_TABLE));
}
-#define kasan_early_shadow_pgd_next (pgtable_l4_enabled ? \
+static void __init kasan_populate_p4d(pgd_t *pgd,
+ unsigned long vaddr, unsigned long end,
+ bool early)
+{
+ phys_addr_t phys_addr;
+ p4d_t *p4dp, *base_p4d;
+ unsigned long next;
+
+ if (early) {
+ /*
+ * We can't use pgd_page_vaddr here as it would return a linear
+ * mapping address but it is not mapped yet, but when populating
+ * early_pg_dir, we need the physical address and when populating
+ * swapper_pg_dir, we need the kernel virtual address so use
+ * pt_ops facility.
+ */
+ base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(*pgd)));
+ } else {
+ base_p4d = (p4d_t *)pgd_page_vaddr(*pgd);
+ if (base_p4d == lm_alias(kasan_early_shadow_p4d))
+ base_p4d = memblock_alloc(PTRS_PER_PUD * sizeof(p4d_t), PAGE_SIZE);
+ }
+
+ p4dp = base_p4d + p4d_index(vaddr);
+
+ do {
+ next = p4d_addr_end(vaddr, end);
+
+ if (p4d_none(*p4dp) && IS_ALIGNED(vaddr, P4D_SIZE) && (next - vaddr) >= P4D_SIZE) {
+ if (early) {
+ phys_addr = __pa(((uintptr_t)kasan_early_shadow_pud));
+ set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_TABLE));
+ continue;
+ } else {
+ phys_addr = memblock_phys_alloc(P4D_SIZE, P4D_SIZE);
+ if (phys_addr) {
+ set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_KERNEL));
+ continue;
+ }
+ }
+ }
+
+ kasan_populate_pud((pgd_t *)p4dp, vaddr, next, early);
+ } while (p4dp++, vaddr = next, vaddr != end);
+
+ /*
+ * Wait for the whole P4D to be populated before setting the P4D in
+ * the page table, otherwise, if we did set the P4D before populating
+ * it entirely, memblock could allocate a page at a physical address
+ * where KASAN is not populated yet and then we'd get a page fault.
+ */
+ if (!early)
+ set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_p4d)), PAGE_TABLE));
+}
+
+#define kasan_early_shadow_pgd_next (pgtable_l5_enabled ? \
+ (uintptr_t)kasan_early_shadow_p4d : \
+ (pgtable_l4_enabled ? \
(uintptr_t)kasan_early_shadow_pud : \
- (uintptr_t)kasan_early_shadow_pmd)
+ (uintptr_t)kasan_early_shadow_pmd))
#define kasan_populate_pgd_next(pgdp, vaddr, next, early) \
+ (pgtable_l5_enabled ? \
+ kasan_populate_p4d(pgdp, vaddr, next, early) : \
(pgtable_l4_enabled ? \
kasan_populate_pud(pgdp, vaddr, next, early) : \
- kasan_populate_pmd((pud_t *)pgdp, vaddr, next))
+ kasan_populate_pmd((pud_t *)pgdp, vaddr, next)))
static void __init kasan_populate_pgd(pgd_t *pgdp,
unsigned long vaddr, unsigned long end,
@@ -221,6 +282,14 @@ asmlinkage void __init kasan_early_init(void)
PAGE_TABLE));
}
+ if (pgtable_l5_enabled) {
+ for (i = 0; i < PTRS_PER_P4D; ++i)
+ set_p4d(kasan_early_shadow_p4d + i,
+ pfn_p4d(PFN_DOWN
+ (__pa(((uintptr_t)kasan_early_shadow_pud))),
+ PAGE_TABLE));
+ }
+
kasan_populate_pgd(early_pg_dir + pgd_index(KASAN_SHADOW_START),
KASAN_SHADOW_START, KASAN_SHADOW_END, true);
@@ -246,9 +315,27 @@ static void __init kasan_populate(void *start, void *end)
memset(start, KASAN_SHADOW_INIT, end - start);
}
+static void __init kasan_shallow_populate_pmd(pgd_t *pgdp,
+ unsigned long vaddr, unsigned long end)
+{
+ unsigned long next;
+ pmd_t *pmdp, *base_pmd;
+ bool is_kasan_pte;
+
+ base_pmd = (pmd_t *)pgd_page_vaddr(*pgdp);
+ pmdp = base_pmd + pmd_index(vaddr);
+
+ do {
+ next = pmd_addr_end(vaddr, end);
+ is_kasan_pte = (pmd_pgtable(*pmdp) == lm_alias(kasan_early_shadow_pte));
+
+ if (is_kasan_pte)
+ pmd_clear(pmdp);
+ } while (pmdp++, vaddr = next, vaddr != end);
+}
+
static void __init kasan_shallow_populate_pud(pgd_t *pgdp,
- unsigned long vaddr, unsigned long end,
- bool kasan_populate)
+ unsigned long vaddr, unsigned long end)
{
unsigned long next;
pud_t *pudp, *base_pud;
@@ -258,21 +345,60 @@ static void __init kasan_shallow_populate_pud(pgd_t *pgdp,
base_pud = (pud_t *)pgd_page_vaddr(*pgdp);
pudp = base_pud + pud_index(vaddr);
- if (kasan_populate)
- memcpy(base_pud, (void *)kasan_early_shadow_pgd_next,
- sizeof(pud_t) * PTRS_PER_PUD);
-
do {
next = pud_addr_end(vaddr, end);
is_kasan_pmd = (pud_pgtable(*pudp) == lm_alias(kasan_early_shadow_pmd));
- if (is_kasan_pmd) {
- base_pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- set_pud(pudp, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
- }
+ if (!is_kasan_pmd)
+ continue;
+
+ base_pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ set_pud(pudp, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
+
+ if (IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE)
+ continue;
+
+ memcpy(base_pmd, (void *)kasan_early_shadow_pmd, PAGE_SIZE);
+ kasan_shallow_populate_pmd((pgd_t *)pudp, vaddr, next);
} while (pudp++, vaddr = next, vaddr != end);
}
+static void __init kasan_shallow_populate_p4d(pgd_t *pgdp,
+ unsigned long vaddr, unsigned long end)
+{
+ unsigned long next;
+ p4d_t *p4dp, *base_p4d;
+ pud_t *base_pud;
+ bool is_kasan_pud;
+
+ base_p4d = (p4d_t *)pgd_page_vaddr(*pgdp);
+ p4dp = base_p4d + p4d_index(vaddr);
+
+ do {
+ next = p4d_addr_end(vaddr, end);
+ is_kasan_pud = (p4d_pgtable(*p4dp) == lm_alias(kasan_early_shadow_pud));
+
+ if (!is_kasan_pud)
+ continue;
+
+ base_pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ set_p4d(p4dp, pfn_p4d(PFN_DOWN(__pa(base_pud)), PAGE_TABLE));
+
+ if (IS_ALIGNED(vaddr, P4D_SIZE) && (next - vaddr) >= P4D_SIZE)
+ continue;
+
+ memcpy(base_pud, (void *)kasan_early_shadow_pud, PAGE_SIZE);
+ kasan_shallow_populate_pud((pgd_t *)p4dp, vaddr, next);
+ } while (p4dp++, vaddr = next, vaddr != end);
+}
+
+#define kasan_shallow_populate_pgd_next(pgdp, vaddr, next) \
+ (pgtable_l5_enabled ? \
+ kasan_shallow_populate_p4d(pgdp, vaddr, next) : \
+ (pgtable_l4_enabled ? \
+ kasan_shallow_populate_pud(pgdp, vaddr, next) : \
+ kasan_shallow_populate_pmd(pgdp, vaddr, next)))
+
static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long end)
{
unsigned long next;
@@ -293,7 +419,8 @@ static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long
if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE)
continue;
- kasan_shallow_populate_pud(pgd_k, vaddr, next, is_kasan_pgd_next);
+ memcpy(p, (void *)kasan_early_shadow_pgd_next, PAGE_SIZE);
+ kasan_shallow_populate_pgd_next(pgd_k, vaddr, next);
} while (pgd_k++, vaddr = next, vaddr != end);
}
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index d05ca6ebbb9d..afdcb91601d2 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -56,6 +56,36 @@ config ARM_PMU
Say y if you want to use CPU performance monitors on ARM-based
systems.
+config RISCV_PMU
+ depends on RISCV
+ bool "RISC-V PMU framework"
+ default y
+ help
+ Say y if you want to use CPU performance monitors on RISCV-based
+ systems. This provides the core PMU framework that abstracts common
+ PMU functionalities in a core library so that different PMU drivers
+ can reuse it.
+
+config RISCV_PMU_LEGACY
+ depends on RISCV_PMU
+ bool "RISC-V legacy PMU implementation"
+ default y
+ help
+ Say y if you want to use the legacy CPU performance monitor
+ implementation on RISC-V based systems. This only allows counting
+ of cycle/instruction counter and doesn't support counter overflow,
+ or programmable counters. It will be removed in future.
+
+config RISCV_PMU_SBI
+ depends on RISCV_PMU && RISCV_SBI
+ bool "RISC-V PMU based on SBI PMU extension"
+ default y
+ help
+ Say y if you want to use the CPU performance monitor
+ using SBI PMU extension on RISC-V based systems. This option provides
+ full perf feature support i.e. counter overflow, privilege mode
+ filtering, counter configuration.
+
config ARM_PMU_ACPI
depends on ARM_PMU && ACPI
def_bool y
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 4f43080ec54e..57a279c61df5 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -10,6 +10,9 @@ obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o
obj-$(CONFIG_HISI_PMU) += hisilicon/
obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o
obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
+obj-$(CONFIG_RISCV_PMU) += riscv_pmu.o
+obj-$(CONFIG_RISCV_PMU_LEGACY) += riscv_pmu_legacy.o
+obj-$(CONFIG_RISCV_PMU_SBI) += riscv_pmu_sbi.o
obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
new file mode 100644
index 000000000000..b2b8d2074ed0
--- /dev/null
+++ b/drivers/perf/riscv_pmu.c
@@ -0,0 +1,324 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RISC-V performance counter support.
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ *
+ * This implementation is based on old RISC-V perf and ARM perf event code
+ * which are in turn based on sparc64 and x86 code.
+ */
+
+#include <linux/cpumask.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
+#include <linux/perf/riscv_pmu.h>
+#include <linux/printk.h>
+#include <linux/smp.h>
+
+#include <asm/sbi.h>
+
+static unsigned long csr_read_num(int csr_num)
+{
+#define switchcase_csr_read(__csr_num, __val) {\
+ case __csr_num: \
+ __val = csr_read(__csr_num); \
+ break; }
+#define switchcase_csr_read_2(__csr_num, __val) {\
+ switchcase_csr_read(__csr_num + 0, __val) \
+ switchcase_csr_read(__csr_num + 1, __val)}
+#define switchcase_csr_read_4(__csr_num, __val) {\
+ switchcase_csr_read_2(__csr_num + 0, __val) \
+ switchcase_csr_read_2(__csr_num + 2, __val)}
+#define switchcase_csr_read_8(__csr_num, __val) {\
+ switchcase_csr_read_4(__csr_num + 0, __val) \
+ switchcase_csr_read_4(__csr_num + 4, __val)}
+#define switchcase_csr_read_16(__csr_num, __val) {\
+ switchcase_csr_read_8(__csr_num + 0, __val) \
+ switchcase_csr_read_8(__csr_num + 8, __val)}
+#define switchcase_csr_read_32(__csr_num, __val) {\
+ switchcase_csr_read_16(__csr_num + 0, __val) \
+ switchcase_csr_read_16(__csr_num + 16, __val)}
+
+ unsigned long ret = 0;
+
+ switch (csr_num) {
+ switchcase_csr_read_32(CSR_CYCLE, ret)
+ switchcase_csr_read_32(CSR_CYCLEH, ret)
+ default :
+ break;
+ }
+
+ return ret;
+#undef switchcase_csr_read_32
+#undef switchcase_csr_read_16
+#undef switchcase_csr_read_8
+#undef switchcase_csr_read_4
+#undef switchcase_csr_read_2
+#undef switchcase_csr_read
+}
+
+/*
+ * Read the CSR of a corresponding counter.
+ */
+unsigned long riscv_pmu_ctr_read_csr(unsigned long csr)
+{
+ if (csr < CSR_CYCLE || csr > CSR_HPMCOUNTER31H ||
+ (csr > CSR_HPMCOUNTER31 && csr < CSR_CYCLEH)) {
+ pr_err("Invalid performance counter csr %lx\n", csr);
+ return -EINVAL;
+ }
+
+ return csr_read_num(csr);
+}
+
+u64 riscv_pmu_ctr_get_width_mask(struct perf_event *event)
+{
+ int cwidth;
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!rvpmu->ctr_get_width)
+ /**
+ * If the pmu driver doesn't support counter width, set it to default
+ * maximum allowed by the specification.
+ */
+ cwidth = 63;
+ else {
+ if (hwc->idx == -1)
+ /* Handle init case where idx is not initialized yet */
+ cwidth = rvpmu->ctr_get_width(0);
+ else
+ cwidth = rvpmu->ctr_get_width(hwc->idx);
+ }
+
+ return GENMASK_ULL(cwidth, 0);
+}
+
+u64 riscv_pmu_event_update(struct perf_event *event)
+{
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u64 prev_raw_count, new_raw_count;
+ unsigned long cmask;
+ u64 oldval, delta;
+
+ if (!rvpmu->ctr_read)
+ return 0;
+
+ cmask = riscv_pmu_ctr_get_width_mask(event);
+
+ do {
+ prev_raw_count = local64_read(&hwc->prev_count);
+ new_raw_count = rvpmu->ctr_read(event);
+ oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count);
+ } while (oldval != prev_raw_count);
+
+ delta = (new_raw_count - prev_raw_count) & cmask;
+ local64_add(delta, &event->count);
+ local64_sub(delta, &hwc->period_left);
+
+ return delta;
+}
+
+static void riscv_pmu_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+
+ if (!(hwc->state & PERF_HES_STOPPED)) {
+ if (rvpmu->ctr_stop) {
+ rvpmu->ctr_stop(event, 0);
+ hwc->state |= PERF_HES_STOPPED;
+ }
+ riscv_pmu_event_update(event);
+ hwc->state |= PERF_HES_UPTODATE;
+ }
+}
+
+int riscv_pmu_event_set_period(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ s64 left = local64_read(&hwc->period_left);
+ s64 period = hwc->sample_period;
+ int overflow = 0;
+ uint64_t max_period = riscv_pmu_ctr_get_width_mask(event);
+
+ if (unlikely(left <= -period)) {
+ left = period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ overflow = 1;
+ }
+
+ if (unlikely(left <= 0)) {
+ left += period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ overflow = 1;
+ }
+
+ /*
+ * Limit the maximum period to prevent the counter value
+ * from overtaking the one we are about to program. In
+ * effect we are reducing max_period to account for
+ * interrupt latency (and we are being very conservative).
+ */
+ if (left > (max_period >> 1))
+ left = (max_period >> 1);
+
+ local64_set(&hwc->prev_count, (u64)-left);
+ perf_event_update_userpage(event);
+
+ return overflow;
+}
+
+static void riscv_pmu_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+ uint64_t max_period = riscv_pmu_ctr_get_width_mask(event);
+ u64 init_val;
+
+ if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+ return;
+
+ if (flags & PERF_EF_RELOAD)
+ WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+ hwc->state = 0;
+ riscv_pmu_event_set_period(event);
+ init_val = local64_read(&hwc->prev_count) & max_period;
+ rvpmu->ctr_start(event, init_val);
+ perf_event_update_userpage(event);
+}
+
+static int riscv_pmu_add(struct perf_event *event, int flags)
+{
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx;
+
+ idx = rvpmu->ctr_get_idx(event);
+ if (idx < 0)
+ return idx;
+
+ hwc->idx = idx;
+ cpuc->events[idx] = event;
+ cpuc->n_events++;
+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+ if (flags & PERF_EF_START)
+ riscv_pmu_start(event, PERF_EF_RELOAD);
+
+ /* Propagate our changes to the userspace mapping. */
+ perf_event_update_userpage(event);
+
+ return 0;
+}
+
+static void riscv_pmu_del(struct perf_event *event, int flags)
+{
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+
+ riscv_pmu_stop(event, PERF_EF_UPDATE);
+ cpuc->events[hwc->idx] = NULL;
+ /* The firmware need to reset the counter mapping */
+ if (rvpmu->ctr_stop)
+ rvpmu->ctr_stop(event, RISCV_PMU_STOP_FLAG_RESET);
+ cpuc->n_events--;
+ if (rvpmu->ctr_clear_idx)
+ rvpmu->ctr_clear_idx(event);
+ perf_event_update_userpage(event);
+ hwc->idx = -1;
+}
+
+static void riscv_pmu_read(struct perf_event *event)
+{
+ riscv_pmu_event_update(event);
+}
+
+static int riscv_pmu_event_init(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+ int mapped_event;
+ u64 event_config = 0;
+ uint64_t cmask;
+
+ hwc->flags = 0;
+ mapped_event = rvpmu->event_map(event, &event_config);
+ if (mapped_event < 0) {
+ pr_debug("event %x:%llx not supported\n", event->attr.type,
+ event->attr.config);
+ return mapped_event;
+ }
+
+ /*
+ * idx is set to -1 because the index of a general event should not be
+ * decided until binding to some counter in pmu->add().
+ * config will contain the information about counter CSR
+ * the idx will contain the counter index
+ */
+ hwc->config = event_config;
+ hwc->idx = -1;
+ hwc->event_base = mapped_event;
+
+ if (!is_sampling_event(event)) {
+ /*
+ * For non-sampling runs, limit the sample_period to half
+ * of the counter width. That way, the new counter value
+ * is far less likely to overtake the previous one unless
+ * you have some serious IRQ latency issues.
+ */
+ cmask = riscv_pmu_ctr_get_width_mask(event);
+ hwc->sample_period = cmask >> 1;
+ hwc->last_period = hwc->sample_period;
+ local64_set(&hwc->period_left, hwc->sample_period);
+ }
+
+ return 0;
+}
+
+struct riscv_pmu *riscv_pmu_alloc(void)
+{
+ struct riscv_pmu *pmu;
+ int cpuid, i;
+ struct cpu_hw_events *cpuc;
+
+ pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
+ if (!pmu)
+ goto out;
+
+ pmu->hw_events = alloc_percpu_gfp(struct cpu_hw_events, GFP_KERNEL);
+ if (!pmu->hw_events) {
+ pr_info("failed to allocate per-cpu PMU data.\n");
+ goto out_free_pmu;
+ }
+
+ for_each_possible_cpu(cpuid) {
+ cpuc = per_cpu_ptr(pmu->hw_events, cpuid);
+ cpuc->n_events = 0;
+ for (i = 0; i < RISCV_MAX_COUNTERS; i++)
+ cpuc->events[i] = NULL;
+ }
+ pmu->pmu = (struct pmu) {
+ .event_init = riscv_pmu_event_init,
+ .add = riscv_pmu_add,
+ .del = riscv_pmu_del,
+ .start = riscv_pmu_start,
+ .stop = riscv_pmu_stop,
+ .read = riscv_pmu_read,
+ };
+
+ return pmu;
+
+out_free_pmu:
+ kfree(pmu);
+out:
+ return NULL;
+}
diff --git a/drivers/perf/riscv_pmu_legacy.c b/drivers/perf/riscv_pmu_legacy.c
new file mode 100644
index 000000000000..342778782359
--- /dev/null
+++ b/drivers/perf/riscv_pmu_legacy.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RISC-V performance counter support.
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ *
+ * This implementation is based on old RISC-V perf and ARM perf event code
+ * which are in turn based on sparc64 and x86 code.
+ */
+
+#include <linux/mod_devicetable.h>
+#include <linux/perf/riscv_pmu.h>
+#include <linux/platform_device.h>
+
+#define RISCV_PMU_LEGACY_CYCLE 0
+#define RISCV_PMU_LEGACY_INSTRET 1
+#define RISCV_PMU_LEGACY_NUM_CTR 2
+
+static bool pmu_init_done;
+
+static int pmu_legacy_ctr_get_idx(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+
+ if (event->attr.type != PERF_TYPE_HARDWARE)
+ return -EOPNOTSUPP;
+ if (attr->config == PERF_COUNT_HW_CPU_CYCLES)
+ return RISCV_PMU_LEGACY_CYCLE;
+ else if (attr->config == PERF_COUNT_HW_INSTRUCTIONS)
+ return RISCV_PMU_LEGACY_INSTRET;
+ else
+ return -EOPNOTSUPP;
+}
+
+/* For legacy config & counter index are same */
+static int pmu_legacy_event_map(struct perf_event *event, u64 *config)
+{
+ return pmu_legacy_ctr_get_idx(event);
+}
+
+static u64 pmu_legacy_read_ctr(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+ u64 val;
+
+ if (idx == RISCV_PMU_LEGACY_CYCLE) {
+ val = riscv_pmu_ctr_read_csr(CSR_CYCLE);
+ if (IS_ENABLED(CONFIG_32BIT))
+ val = (u64)riscv_pmu_ctr_read_csr(CSR_CYCLEH) << 32 | val;
+ } else if (idx == RISCV_PMU_LEGACY_INSTRET) {
+ val = riscv_pmu_ctr_read_csr(CSR_INSTRET);
+ if (IS_ENABLED(CONFIG_32BIT))
+ val = ((u64)riscv_pmu_ctr_read_csr(CSR_INSTRETH)) << 32 | val;
+ } else
+ return 0;
+
+ return val;
+}
+
+static void pmu_legacy_ctr_start(struct perf_event *event, u64 ival)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 initial_val = pmu_legacy_read_ctr(event);
+
+ /**
+ * The legacy method doesn't really have a start/stop method.
+ * It also can not update the counter with a initial value.
+ * But we still need to set the prev_count so that read() can compute
+ * the delta. Just use the current counter value to set the prev_count.
+ */
+ local64_set(&hwc->prev_count, initial_val);
+}
+
+/**
+ * This is just a simple implementation to allow legacy implementations
+ * compatible with new RISC-V PMU driver framework.
+ * This driver only allows reading two counters i.e CYCLE & INSTRET.
+ * However, it can not start or stop the counter. Thus, it is not very useful
+ * will be removed in future.
+ */
+static void pmu_legacy_init(struct riscv_pmu *pmu)
+{
+ pr_info("Legacy PMU implementation is available\n");
+
+ pmu->num_counters = RISCV_PMU_LEGACY_NUM_CTR;
+ pmu->ctr_start = pmu_legacy_ctr_start;
+ pmu->ctr_stop = NULL;
+ pmu->event_map = pmu_legacy_event_map;
+ pmu->ctr_get_idx = pmu_legacy_ctr_get_idx;
+ pmu->ctr_get_width = NULL;
+ pmu->ctr_clear_idx = NULL;
+ pmu->ctr_read = pmu_legacy_read_ctr;
+
+ perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW);
+}
+
+static int pmu_legacy_device_probe(struct platform_device *pdev)
+{
+ struct riscv_pmu *pmu = NULL;
+
+ pmu = riscv_pmu_alloc();
+ if (!pmu)
+ return -ENOMEM;
+ pmu_legacy_init(pmu);
+
+ return 0;
+}
+
+static struct platform_driver pmu_legacy_driver = {
+ .probe = pmu_legacy_device_probe,
+ .driver = {
+ .name = RISCV_PMU_LEGACY_PDEV_NAME,
+ },
+};
+
+static int __init riscv_pmu_legacy_devinit(void)
+{
+ int ret;
+ struct platform_device *pdev;
+
+ if (likely(pmu_init_done))
+ return 0;
+
+ ret = platform_driver_register(&pmu_legacy_driver);
+ if (ret)
+ return ret;
+
+ pdev = platform_device_register_simple(RISCV_PMU_LEGACY_PDEV_NAME, -1, NULL, 0);
+ if (IS_ERR(pdev)) {
+ platform_driver_unregister(&pmu_legacy_driver);
+ return PTR_ERR(pdev);
+ }
+
+ return ret;
+}
+late_initcall(riscv_pmu_legacy_devinit);
+
+void riscv_pmu_legacy_skip_init(void)
+{
+ pmu_init_done = true;
+}
diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
new file mode 100644
index 000000000000..a1317a483512
--- /dev/null
+++ b/drivers/perf/riscv_pmu_sbi.c
@@ -0,0 +1,790 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RISC-V performance counter support.
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ *
+ * This code is based on ARM perf event code which is in turn based on
+ * sparc64 and x86 code.
+ */
+
+#define pr_fmt(fmt) "riscv-pmu-sbi: " fmt
+
+#include <linux/mod_devicetable.h>
+#include <linux/perf/riscv_pmu.h>
+#include <linux/platform_device.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/of_irq.h>
+#include <linux/of.h>
+
+#include <asm/sbi.h>
+#include <asm/hwcap.h>
+
+union sbi_pmu_ctr_info {
+ unsigned long value;
+ struct {
+ unsigned long csr:12;
+ unsigned long width:6;
+#if __riscv_xlen == 32
+ unsigned long reserved:13;
+#else
+ unsigned long reserved:45;
+#endif
+ unsigned long type:1;
+ };
+};
+
+/**
+ * RISC-V doesn't have hetergenous harts yet. This need to be part of
+ * per_cpu in case of harts with different pmu counters
+ */
+static union sbi_pmu_ctr_info *pmu_ctr_list;
+static unsigned int riscv_pmu_irq;
+
+struct sbi_pmu_event_data {
+ union {
+ union {
+ struct hw_gen_event {
+ uint32_t event_code:16;
+ uint32_t event_type:4;
+ uint32_t reserved:12;
+ } hw_gen_event;
+ struct hw_cache_event {
+ uint32_t result_id:1;
+ uint32_t op_id:2;
+ uint32_t cache_id:13;
+ uint32_t event_type:4;
+ uint32_t reserved:12;
+ } hw_cache_event;
+ };
+ uint32_t event_idx;
+ };
+};
+
+static const struct sbi_pmu_event_data pmu_hw_event_map[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = {.hw_gen_event = {
+ SBI_PMU_HW_CPU_CYCLES,
+ SBI_PMU_EVENT_TYPE_HW, 0}},
+ [PERF_COUNT_HW_INSTRUCTIONS] = {.hw_gen_event = {
+ SBI_PMU_HW_INSTRUCTIONS,
+ SBI_PMU_EVENT_TYPE_HW, 0}},
+ [PERF_COUNT_HW_CACHE_REFERENCES] = {.hw_gen_event = {
+ SBI_PMU_HW_CACHE_REFERENCES,
+ SBI_PMU_EVENT_TYPE_HW, 0}},
+ [PERF_COUNT_HW_CACHE_MISSES] = {.hw_gen_event = {
+ SBI_PMU_HW_CACHE_MISSES,
+ SBI_PMU_EVENT_TYPE_HW, 0}},
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = {.hw_gen_event = {
+ SBI_PMU_HW_BRANCH_INSTRUCTIONS,
+ SBI_PMU_EVENT_TYPE_HW, 0}},
+ [PERF_COUNT_HW_BRANCH_MISSES] = {.hw_gen_event = {
+ SBI_PMU_HW_BRANCH_MISSES,
+ SBI_PMU_EVENT_TYPE_HW, 0}},
+ [PERF_COUNT_HW_BUS_CYCLES] = {.hw_gen_event = {
+ SBI_PMU_HW_BUS_CYCLES,
+ SBI_PMU_EVENT_TYPE_HW, 0}},
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = {.hw_gen_event = {
+ SBI_PMU_HW_STALLED_CYCLES_FRONTEND,
+ SBI_PMU_EVENT_TYPE_HW, 0}},
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = {.hw_gen_event = {
+ SBI_PMU_HW_STALLED_CYCLES_BACKEND,
+ SBI_PMU_EVENT_TYPE_HW, 0}},
+ [PERF_COUNT_HW_REF_CPU_CYCLES] = {.hw_gen_event = {
+ SBI_PMU_HW_REF_CPU_CYCLES,
+ SBI_PMU_EVENT_TYPE_HW, 0}},
+};
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
+[PERF_COUNT_HW_CACHE_OP_MAX]
+[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ [C(L1D)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+ C(OP_READ), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+ C(OP_READ), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+ C(OP_WRITE), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+ C(OP_WRITE), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+ C(OP_PREFETCH), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+ C(OP_PREFETCH), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ },
+ },
+ [C(L1I)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+ C(OP_READ), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), C(OP_READ),
+ C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+ C(OP_WRITE), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+ C(OP_WRITE), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+ C(OP_PREFETCH), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+ C(OP_PREFETCH), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ },
+ },
+ [C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+ C(OP_READ), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+ C(OP_READ), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+ C(OP_WRITE), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+ C(OP_WRITE), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+ C(OP_PREFETCH), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+ C(OP_PREFETCH), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ },
+ },
+ [C(DTLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+ C(OP_READ), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+ C(OP_READ), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+ C(OP_WRITE), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+ C(OP_WRITE), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+ C(OP_PREFETCH), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+ C(OP_PREFETCH), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ },
+ },
+ [C(ITLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+ C(OP_READ), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+ C(OP_READ), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+ C(OP_WRITE), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+ C(OP_WRITE), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+ C(OP_PREFETCH), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+ C(OP_PREFETCH), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ },
+ },
+ [C(BPU)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+ C(OP_READ), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+ C(OP_READ), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+ C(OP_WRITE), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+ C(OP_WRITE), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+ C(OP_PREFETCH), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+ C(OP_PREFETCH), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ },
+ },
+ [C(NODE)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+ C(OP_READ), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+ C(OP_READ), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+ C(OP_WRITE), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+ C(OP_WRITE), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+ C(OP_PREFETCH), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+ C(OP_PREFETCH), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+ },
+ },
+};
+
+static int pmu_sbi_ctr_get_width(int idx)
+{
+ return pmu_ctr_list[idx].width;
+}
+
+static bool pmu_sbi_ctr_is_fw(int cidx)
+{
+ union sbi_pmu_ctr_info *info;
+
+ info = &pmu_ctr_list[cidx];
+ if (!info)
+ return false;
+
+ return (info->type == SBI_PMU_CTR_TYPE_FW) ? true : false;
+}
+
+static int pmu_sbi_ctr_get_idx(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
+ struct sbiret ret;
+ int idx;
+ uint64_t cbase = 0;
+ uint64_t cmask = GENMASK_ULL(rvpmu->num_counters - 1, 0);
+ unsigned long cflags = 0;
+
+ if (event->attr.exclude_kernel)
+ cflags |= SBI_PMU_CFG_FLAG_SET_SINH;
+ if (event->attr.exclude_user)
+ cflags |= SBI_PMU_CFG_FLAG_SET_UINH;
+
+ /* retrieve the available counter index */
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase, cmask,
+ cflags, hwc->event_base, hwc->config, 0);
+ if (ret.error) {
+ pr_debug("Not able to find a counter for event %lx config %llx\n",
+ hwc->event_base, hwc->config);
+ return sbi_err_map_linux_errno(ret.error);
+ }
+
+ idx = ret.value;
+ if (idx >= rvpmu->num_counters || !pmu_ctr_list[idx].value)
+ return -ENOENT;
+
+ /* Additional sanity check for the counter id */
+ if (pmu_sbi_ctr_is_fw(idx)) {
+ if (!test_and_set_bit(idx, cpuc->used_fw_ctrs))
+ return idx;
+ } else {
+ if (!test_and_set_bit(idx, cpuc->used_hw_ctrs))
+ return idx;
+ }
+
+ return -ENOENT;
+}
+
+static void pmu_sbi_ctr_clear_idx(struct perf_event *event)
+{
+
+ struct hw_perf_event *hwc = &event->hw;
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
+ int idx = hwc->idx;
+
+ if (pmu_sbi_ctr_is_fw(idx))
+ clear_bit(idx, cpuc->used_fw_ctrs);
+ else
+ clear_bit(idx, cpuc->used_hw_ctrs);
+}
+
+static int pmu_event_find_cache(u64 config)
+{
+ unsigned int cache_type, cache_op, cache_result, ret;
+
+ cache_type = (config >> 0) & 0xff;
+ if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+ return -EINVAL;
+
+ cache_op = (config >> 8) & 0xff;
+ if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+ return -EINVAL;
+
+ cache_result = (config >> 16) & 0xff;
+ if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+ return -EINVAL;
+
+ ret = pmu_cache_event_map[cache_type][cache_op][cache_result].event_idx;
+
+ return ret;
+}
+
+static bool pmu_sbi_is_fw_event(struct perf_event *event)
+{
+ u32 type = event->attr.type;
+ u64 config = event->attr.config;
+
+ if ((type == PERF_TYPE_RAW) && ((config >> 63) == 1))
+ return true;
+ else
+ return false;
+}
+
+static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig)
+{
+ u32 type = event->attr.type;
+ u64 config = event->attr.config;
+ int bSoftware;
+ u64 raw_config_val;
+ int ret;
+
+ switch (type) {
+ case PERF_TYPE_HARDWARE:
+ if (config >= PERF_COUNT_HW_MAX)
+ return -EINVAL;
+ ret = pmu_hw_event_map[event->attr.config].event_idx;
+ break;
+ case PERF_TYPE_HW_CACHE:
+ ret = pmu_event_find_cache(config);
+ break;
+ case PERF_TYPE_RAW:
+ /*
+ * As per SBI specification, the upper 16 bits must be unused for
+ * a raw event. Use the MSB (63b) to distinguish between hardware
+ * raw event and firmware events.
+ */
+ bSoftware = config >> 63;
+ raw_config_val = config & RISCV_PMU_RAW_EVENT_MASK;
+ if (bSoftware) {
+ if (raw_config_val < SBI_PMU_FW_MAX)
+ ret = (raw_config_val & 0xFFFF) |
+ (SBI_PMU_EVENT_TYPE_FW << 16);
+ else
+ return -EINVAL;
+ } else {
+ ret = RISCV_PMU_RAW_EVENT_IDX;
+ *econfig = raw_config_val;
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static u64 pmu_sbi_ctr_read(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+ struct sbiret ret;
+ union sbi_pmu_ctr_info info;
+ u64 val = 0;
+
+ if (pmu_sbi_is_fw_event(event)) {
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ,
+ hwc->idx, 0, 0, 0, 0, 0);
+ if (!ret.error)
+ val = ret.value;
+ } else {
+ info = pmu_ctr_list[idx];
+ val = riscv_pmu_ctr_read_csr(info.csr);
+ if (IS_ENABLED(CONFIG_32BIT))
+ val = ((u64)riscv_pmu_ctr_read_csr(info.csr + 0x80)) << 31 | val;
+ }
+
+ return val;
+}
+
+static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival)
+{
+ struct sbiret ret;
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, hwc->idx,
+ 1, flag, ival, ival >> 32, 0);
+ if (ret.error && (ret.error != SBI_ERR_ALREADY_STARTED))
+ pr_err("Starting counter idx %d failed with error %d\n",
+ hwc->idx, sbi_err_map_linux_errno(ret.error));
+}
+
+static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag)
+{
+ struct sbiret ret;
+ struct hw_perf_event *hwc = &event->hw;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0);
+ if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) &&
+ flag != SBI_PMU_STOP_FLAG_RESET)
+ pr_err("Stopping counter idx %d failed with error %d\n",
+ hwc->idx, sbi_err_map_linux_errno(ret.error));
+}
+
+static int pmu_sbi_find_num_ctrs(void)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_NUM_COUNTERS, 0, 0, 0, 0, 0, 0);
+ if (!ret.error)
+ return ret.value;
+ else
+ return sbi_err_map_linux_errno(ret.error);
+}
+
+static int pmu_sbi_get_ctrinfo(int nctr)
+{
+ struct sbiret ret;
+ int i, num_hw_ctr = 0, num_fw_ctr = 0;
+ union sbi_pmu_ctr_info cinfo;
+
+ pmu_ctr_list = kcalloc(nctr, sizeof(*pmu_ctr_list), GFP_KERNEL);
+ if (!pmu_ctr_list)
+ return -ENOMEM;
+
+ for (i = 0; i <= nctr; i++) {
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i, 0, 0, 0, 0, 0);
+ if (ret.error)
+ /* The logical counter ids are not expected to be contiguous */
+ continue;
+ cinfo.value = ret.value;
+ if (cinfo.type == SBI_PMU_CTR_TYPE_FW)
+ num_fw_ctr++;
+ else
+ num_hw_ctr++;
+ pmu_ctr_list[i].value = cinfo.value;
+ }
+
+ pr_info("%d firmware and %d hardware counters\n", num_fw_ctr, num_hw_ctr);
+
+ return 0;
+}
+
+static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu)
+{
+ /**
+ * No need to check the error because we are disabling all the counters
+ * which may include counters that are not enabled yet.
+ */
+ sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP,
+ 0, GENMASK_ULL(pmu->num_counters - 1, 0), 0, 0, 0, 0);
+}
+
+static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
+{
+ struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
+
+ /* No need to check the error here as we can't do anything about the error */
+ sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, 0,
+ cpu_hw_evt->used_hw_ctrs[0], 0, 0, 0, 0);
+}
+
+/**
+ * This function starts all the used counters in two step approach.
+ * Any counter that did not overflow can be start in a single step
+ * while the overflowed counters need to be started with updated initialization
+ * value.
+ */
+static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
+ unsigned long ctr_ovf_mask)
+{
+ int idx = 0;
+ struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
+ struct perf_event *event;
+ unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE;
+ unsigned long ctr_start_mask = 0;
+ uint64_t max_period;
+ struct hw_perf_event *hwc;
+ u64 init_val = 0;
+
+ ctr_start_mask = cpu_hw_evt->used_hw_ctrs[0] & ~ctr_ovf_mask;
+
+ /* Start all the counters that did not overflow in a single shot */
+ sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, 0, ctr_start_mask,
+ 0, 0, 0, 0);
+
+ /* Reinitialize and start all the counter that overflowed */
+ while (ctr_ovf_mask) {
+ if (ctr_ovf_mask & 0x01) {
+ event = cpu_hw_evt->events[idx];
+ hwc = &event->hw;
+ max_period = riscv_pmu_ctr_get_width_mask(event);
+ init_val = local64_read(&hwc->prev_count) & max_period;
+ sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx, 1,
+ flag, init_val, 0, 0);
+ }
+ ctr_ovf_mask = ctr_ovf_mask >> 1;
+ idx++;
+ }
+}
+
+static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
+{
+ struct perf_sample_data data;
+ struct pt_regs *regs;
+ struct hw_perf_event *hw_evt;
+ union sbi_pmu_ctr_info *info;
+ int lidx, hidx, fidx;
+ struct riscv_pmu *pmu;
+ struct perf_event *event;
+ unsigned long overflow;
+ unsigned long overflowed_ctrs = 0;
+ struct cpu_hw_events *cpu_hw_evt = dev;
+
+ if (WARN_ON_ONCE(!cpu_hw_evt))
+ return IRQ_NONE;
+
+ /* Firmware counter don't support overflow yet */
+ fidx = find_first_bit(cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS);
+ event = cpu_hw_evt->events[fidx];
+ if (!event) {
+ csr_clear(CSR_SIP, SIP_LCOFIP);
+ return IRQ_NONE;
+ }
+
+ pmu = to_riscv_pmu(event->pmu);
+ pmu_sbi_stop_hw_ctrs(pmu);
+
+ /* Overflow status register should only be read after counter are stopped */
+ overflow = csr_read(CSR_SSCOUNTOVF);
+
+ /**
+ * Overflow interrupt pending bit should only be cleared after stopping
+ * all the counters to avoid any race condition.
+ */
+ csr_clear(CSR_SIP, SIP_LCOFIP);
+
+ /* No overflow bit is set */
+ if (!overflow)
+ return IRQ_NONE;
+
+ regs = get_irq_regs();
+
+ for_each_set_bit(lidx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS) {
+ struct perf_event *event = cpu_hw_evt->events[lidx];
+
+ /* Skip if invalid event or user did not request a sampling */
+ if (!event || !is_sampling_event(event))
+ continue;
+
+ info = &pmu_ctr_list[lidx];
+ /* Do a sanity check */
+ if (!info || info->type != SBI_PMU_CTR_TYPE_HW)
+ continue;
+
+ /* compute hardware counter index */
+ hidx = info->csr - CSR_CYCLE;
+ /* check if the corresponding bit is set in sscountovf */
+ if (!(overflow & (1 << hidx)))
+ continue;
+
+ /*
+ * Keep a track of overflowed counters so that they can be started
+ * with updated initial value.
+ */
+ overflowed_ctrs |= 1 << lidx;
+ hw_evt = &event->hw;
+ riscv_pmu_event_update(event);
+ perf_sample_data_init(&data, 0, hw_evt->last_period);
+ if (riscv_pmu_event_set_period(event)) {
+ /*
+ * Unlike other ISAs, RISC-V don't have to disable interrupts
+ * to avoid throttling here. As per the specification, the
+ * interrupt remains disabled until the OF bit is set.
+ * Interrupts are enabled again only during the start.
+ * TODO: We will need to stop the guest counters once
+ * virtualization support is added.
+ */
+ perf_event_overflow(event, &data, regs);
+ }
+ }
+ pmu_sbi_start_overflow_mask(pmu, overflowed_ctrs);
+
+ return IRQ_HANDLED;
+}
+
+static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct riscv_pmu *pmu = hlist_entry_safe(node, struct riscv_pmu, node);
+ struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
+
+ /* Enable the access for TIME csr only from the user mode now */
+ csr_write(CSR_SCOUNTEREN, 0x2);
+
+ /* Stop all the counters so that they can be enabled from perf */
+ pmu_sbi_stop_all(pmu);
+
+ if (riscv_isa_extension_available(NULL, SSCOFPMF)) {
+ cpu_hw_evt->irq = riscv_pmu_irq;
+ csr_clear(CSR_IP, BIT(RV_IRQ_PMU));
+ csr_set(CSR_IE, BIT(RV_IRQ_PMU));
+ enable_percpu_irq(riscv_pmu_irq, IRQ_TYPE_NONE);
+ }
+
+ return 0;
+}
+
+static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ if (riscv_isa_extension_available(NULL, SSCOFPMF)) {
+ disable_percpu_irq(riscv_pmu_irq);
+ csr_clear(CSR_IE, BIT(RV_IRQ_PMU));
+ }
+
+ /* Disable all counters access for user mode now */
+ csr_write(CSR_SCOUNTEREN, 0x0);
+
+ return 0;
+}
+
+static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pdev)
+{
+ int ret;
+ struct cpu_hw_events __percpu *hw_events = pmu->hw_events;
+ struct device_node *cpu, *child;
+ struct irq_domain *domain = NULL;
+
+ if (!riscv_isa_extension_available(NULL, SSCOFPMF))
+ return -EOPNOTSUPP;
+
+ for_each_of_cpu_node(cpu) {
+ child = of_get_compatible_child(cpu, "riscv,cpu-intc");
+ if (!child) {
+ pr_err("Failed to find INTC node\n");
+ return -ENODEV;
+ }
+ domain = irq_find_host(child);
+ of_node_put(child);
+ if (domain)
+ break;
+ }
+ if (!domain) {
+ pr_err("Failed to find INTC IRQ root domain\n");
+ return -ENODEV;
+ }
+
+ riscv_pmu_irq = irq_create_mapping(domain, RV_IRQ_PMU);
+ if (!riscv_pmu_irq) {
+ pr_err("Failed to map PMU interrupt for node\n");
+ return -ENODEV;
+ }
+
+ ret = request_percpu_irq(riscv_pmu_irq, pmu_sbi_ovf_handler, "riscv-pmu", hw_events);
+ if (ret) {
+ pr_err("registering percpu irq failed [%d]\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int pmu_sbi_device_probe(struct platform_device *pdev)
+{
+ struct riscv_pmu *pmu = NULL;
+ int num_counters;
+ int ret = -ENODEV;
+
+ pr_info("SBI PMU extension is available\n");
+ pmu = riscv_pmu_alloc();
+ if (!pmu)
+ return -ENOMEM;
+
+ num_counters = pmu_sbi_find_num_ctrs();
+ if (num_counters < 0) {
+ pr_err("SBI PMU extension doesn't provide any counters\n");
+ goto out_free;
+ }
+
+ /* cache all the information about counters now */
+ if (pmu_sbi_get_ctrinfo(num_counters))
+ goto out_free;
+
+ ret = pmu_sbi_setup_irqs(pmu, pdev);
+ if (ret < 0) {
+ pr_info("Perf sampling/filtering is not supported as sscof extension is not available\n");
+ pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+ pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE;
+ }
+ pmu->num_counters = num_counters;
+ pmu->ctr_start = pmu_sbi_ctr_start;
+ pmu->ctr_stop = pmu_sbi_ctr_stop;
+ pmu->event_map = pmu_sbi_event_map;
+ pmu->ctr_get_idx = pmu_sbi_ctr_get_idx;
+ pmu->ctr_get_width = pmu_sbi_ctr_get_width;
+ pmu->ctr_clear_idx = pmu_sbi_ctr_clear_idx;
+ pmu->ctr_read = pmu_sbi_ctr_read;
+
+ ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
+ if (ret)
+ return ret;
+
+ ret = perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW);
+ if (ret) {
+ cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
+ return ret;
+ }
+
+ return 0;
+
+out_free:
+ kfree(pmu);
+ return ret;
+}
+
+static struct platform_driver pmu_sbi_driver = {
+ .probe = pmu_sbi_device_probe,
+ .driver = {
+ .name = RISCV_PMU_PDEV_NAME,
+ },
+};
+
+static int __init pmu_sbi_devinit(void)
+{
+ int ret;
+ struct platform_device *pdev;
+
+ if (sbi_spec_version < sbi_mk_version(0, 3) ||
+ sbi_probe_extension(SBI_EXT_PMU) <= 0) {
+ return 0;
+ }
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_RISCV_STARTING,
+ "perf/riscv/pmu:starting",
+ pmu_sbi_starting_cpu, pmu_sbi_dying_cpu);
+ if (ret) {
+ pr_err("CPU hotplug notifier could not be registered: %d\n",
+ ret);
+ return ret;
+ }
+
+ ret = platform_driver_register(&pmu_sbi_driver);
+ if (ret)
+ return ret;
+
+ pdev = platform_device_register_simple(RISCV_PMU_PDEV_NAME, -1, NULL, 0);
+ if (IS_ERR(pdev)) {
+ platform_driver_unregister(&pmu_sbi_driver);
+ return PTR_ERR(pdev);
+ }
+
+ /* Notify legacy implementation that SBI pmu is available*/
+ riscv_pmu_legacy_skip_init();
+
+ return ret;
+}
+device_initcall(pmu_sbi_devinit)
diff --git a/include/dt-bindings/clock/microchip,mpfs-clock.h b/include/dt-bindings/clock/microchip,mpfs-clock.h
new file mode 100644
index 000000000000..73f2a9324857
--- /dev/null
+++ b/include/dt-bindings/clock/microchip,mpfs-clock.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Daire McNamara,<daire.mcnamara@microchip.com>
+ * Copyright (C) 2020 Microchip Technology Inc. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_MICROCHIP_MPFS_H_
+#define _DT_BINDINGS_CLK_MICROCHIP_MPFS_H_
+
+#define CLK_CPU 0
+#define CLK_AXI 1
+#define CLK_AHB 2
+
+#define CLK_ENVM 3
+#define CLK_MAC0 4
+#define CLK_MAC1 5
+#define CLK_MMC 6
+#define CLK_TIMER 7
+#define CLK_MMUART0 8
+#define CLK_MMUART1 9
+#define CLK_MMUART2 10
+#define CLK_MMUART3 11
+#define CLK_MMUART4 12
+#define CLK_SPI0 13
+#define CLK_SPI1 14
+#define CLK_I2C0 15
+#define CLK_I2C1 16
+#define CLK_CAN0 17
+#define CLK_CAN1 18
+#define CLK_USB 19
+#define CLK_RESERVED 20
+#define CLK_RTC 21
+#define CLK_QSPI 22
+#define CLK_GPIO0 23
+#define CLK_GPIO1 24
+#define CLK_GPIO2 25
+#define CLK_DDRC 26
+#define CLK_FIC0 27
+#define CLK_FIC1 28
+#define CLK_FIC2 29
+#define CLK_FIC3 30
+#define CLK_ATHENA 31
+#define CLK_CFM 32
+
+#endif /* _DT_BINDINGS_CLK_MICROCHIP_MPFS_H_ */
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index c7dce7883179..82e33137f917 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -166,6 +166,7 @@ enum cpuhp_state {
CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING,
CPUHP_AP_PERF_ARM_ACPI_STARTING,
CPUHP_AP_PERF_ARM_STARTING,
+ CPUHP_AP_PERF_RISCV_STARTING,
CPUHP_AP_ARM_L2X0_STARTING,
CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING,
CPUHP_AP_ARM_ARCH_TIMER_STARTING,
diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h
new file mode 100644
index 000000000000..46f9b6fe306e
--- /dev/null
+++ b/include/linux/perf/riscv_pmu.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 SiFive
+ * Copyright (C) 2018 Andes Technology Corporation
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ *
+ */
+
+#ifndef _ASM_RISCV_PERF_EVENT_H
+#define _ASM_RISCV_PERF_EVENT_H
+
+#include <linux/perf_event.h>
+#include <linux/ptrace.h>
+#include <linux/interrupt.h>
+
+#ifdef CONFIG_RISCV_PMU
+
+/*
+ * The RISCV_MAX_COUNTERS parameter should be specified.
+ */
+
+#define RISCV_MAX_COUNTERS 64
+#define RISCV_OP_UNSUPP (-EOPNOTSUPP)
+#define RISCV_PMU_PDEV_NAME "riscv-pmu"
+#define RISCV_PMU_LEGACY_PDEV_NAME "riscv-pmu-legacy"
+
+#define RISCV_PMU_STOP_FLAG_RESET 1
+
+struct cpu_hw_events {
+ /* currently enabled events */
+ int n_events;
+ /* Counter overflow interrupt */
+ int irq;
+ /* currently enabled events */
+ struct perf_event *events[RISCV_MAX_COUNTERS];
+ /* currently enabled hardware counters */
+ DECLARE_BITMAP(used_hw_ctrs, RISCV_MAX_COUNTERS);
+ /* currently enabled firmware counters */
+ DECLARE_BITMAP(used_fw_ctrs, RISCV_MAX_COUNTERS);
+};
+
+struct riscv_pmu {
+ struct pmu pmu;
+ char *name;
+
+ irqreturn_t (*handle_irq)(int irq_num, void *dev);
+
+ int num_counters;
+ u64 (*ctr_read)(struct perf_event *event);
+ int (*ctr_get_idx)(struct perf_event *event);
+ int (*ctr_get_width)(int idx);
+ void (*ctr_clear_idx)(struct perf_event *event);
+ void (*ctr_start)(struct perf_event *event, u64 init_val);
+ void (*ctr_stop)(struct perf_event *event, unsigned long flag);
+ int (*event_map)(struct perf_event *event, u64 *config);
+
+ struct cpu_hw_events __percpu *hw_events;
+ struct hlist_node node;
+};
+
+#define to_riscv_pmu(p) (container_of(p, struct riscv_pmu, pmu))
+unsigned long riscv_pmu_ctr_read_csr(unsigned long csr);
+int riscv_pmu_event_set_period(struct perf_event *event);
+uint64_t riscv_pmu_ctr_get_width_mask(struct perf_event *event);
+u64 riscv_pmu_event_update(struct perf_event *event);
+#ifdef CONFIG_RISCV_PMU_LEGACY
+void riscv_pmu_legacy_skip_init(void);
+#else
+static inline void riscv_pmu_legacy_skip_init(void) {};
+#endif
+struct riscv_pmu *riscv_pmu_alloc(void);
+
+#endif /* CONFIG_RISCV_PMU */
+
+#endif /* _ASM_RISCV_PERF_EVENT_H */
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
index da23c22d5882..ef29bc16f358 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -207,6 +207,29 @@ unsigned int yield_mod_cnt, nr_abort;
"addiu " INJECT_ASM_REG ", -1\n\t" \
"bnez " INJECT_ASM_REG ", 222b\n\t" \
"333:\n\t"
+#elif defined(__riscv)
+
+#define RSEQ_INJECT_INPUT \
+ , [loop_cnt_1]"m"(loop_cnt[1]) \
+ , [loop_cnt_2]"m"(loop_cnt[2]) \
+ , [loop_cnt_3]"m"(loop_cnt[3]) \
+ , [loop_cnt_4]"m"(loop_cnt[4]) \
+ , [loop_cnt_5]"m"(loop_cnt[5]) \
+ , [loop_cnt_6]"m"(loop_cnt[6])
+
+#define INJECT_ASM_REG "t1"
+
+#define RSEQ_INJECT_CLOBBER \
+ , INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+ "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
+ "beqz " INJECT_ASM_REG ", 333f\n\t" \
+ "222:\n\t" \
+ "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
+ "bnez " INJECT_ASM_REG ", 222b\n\t" \
+ "333:\n\t"
+
#else
#error unsupported target
diff --git a/tools/testing/selftests/rseq/rseq-riscv.h b/tools/testing/selftests/rseq/rseq-riscv.h
new file mode 100644
index 000000000000..b86642f90d7f
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-riscv.h
@@ -0,0 +1,677 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Select the instruction "csrw mhartid, x0" as the RSEQ_SIG. Unlike
+ * other architectures, the ebreak instruction has no immediate field for
+ * distinguishing purposes. Hence, ebreak is not suitable as RSEQ_SIG.
+ * "csrw mhartid, x0" can also satisfy the RSEQ requirement because it
+ * is an uncommon instruction and will raise an illegal instruction
+ * exception when executed in all modes.
+ */
+#include <endian.h>
+
+#if defined(__BYTE_ORDER) ? (__BYTE_ORDER == __LITTLE_ENDIAN) : defined(__LITTLE_ENDIAN)
+#define RSEQ_SIG 0xf1401073 /* csrr mhartid, x0 */
+#else
+#error "Currently, RSEQ only supports Little-Endian version"
+#endif
+
+#if __riscv_xlen == 64
+#define __REG_SEL(a, b) a
+#elif __riscv_xlen == 32
+#define __REG_SEL(a, b) b
+#endif
+
+#define REG_L __REG_SEL("ld ", "lw ")
+#define REG_S __REG_SEL("sd ", "sw ")
+
+#define RISCV_FENCE(p, s) \
+ __asm__ __volatile__ ("fence " #p "," #s : : : "memory")
+#define rseq_smp_mb() RISCV_FENCE(rw, rw)
+#define rseq_smp_rmb() RISCV_FENCE(r, r)
+#define rseq_smp_wmb() RISCV_FENCE(w, w)
+#define RSEQ_ASM_TMP_REG_1 "t6"
+#define RSEQ_ASM_TMP_REG_2 "t5"
+#define RSEQ_ASM_TMP_REG_3 "t4"
+#define RSEQ_ASM_TMP_REG_4 "t3"
+
+#define rseq_smp_load_acquire(p) \
+__extension__ ({ \
+ __typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \
+ RISCV_FENCE(r, rw) \
+ ____p1; \
+})
+
+#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v) \
+do { \
+ RISCV_FENCE(rw, w); \
+ RSEQ_WRITE_ONCE(*(p), v); \
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \
+ post_commit_offset, abort_ip) \
+ ".pushsection __rseq_cs, \"aw\"\n" \
+ ".balign 32\n" \
+ __rseq_str(label) ":\n" \
+ ".long " __rseq_str(version) ", " __rseq_str(flags) "\n" \
+ ".quad " __rseq_str(start_ip) ", " \
+ __rseq_str(post_commit_offset) ", " \
+ __rseq_str(abort_ip) "\n" \
+ ".popsection\n\t" \
+ ".pushsection __rseq_cs_ptr_array, \"aw\"\n" \
+ ".quad " __rseq_str(label) "b\n" \
+ ".popsection\n"
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
+ __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
+ ((post_commit_ip) - (start_ip)), abort_ip)
+
+/*
+ * Exit points of a rseq critical section consist of all instructions outside
+ * of the critical section where a critical section can either branch to or
+ * reach through the normal course of its execution. The abort IP and the
+ * post-commit IP are already part of the __rseq_cs section and should not be
+ * explicitly defined as additional exit points. Knowing all exit points is
+ * useful to assist debuggers stepping over the critical section.
+ */
+#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
+ ".pushsection __rseq_exit_point_array, \"aw\"\n" \
+ ".quad " __rseq_str(start_ip) ", " __rseq_str(exit_ip) "\n" \
+ ".popsection\n"
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
+ RSEQ_INJECT_ASM(1) \
+ "la "RSEQ_ASM_TMP_REG_1 ", " __rseq_str(cs_label) "\n" \
+ REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(rseq_cs) "]\n" \
+ __rseq_str(label) ":\n"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, abort_label) \
+ "j 222f\n" \
+ ".balign 4\n" \
+ ".long " __rseq_str(RSEQ_SIG) "\n" \
+ __rseq_str(label) ":\n" \
+ "j %l[" __rseq_str(abort_label) "]\n" \
+ "222:\n"
+
+#define RSEQ_ASM_OP_STORE(value, var) \
+ REG_S "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_CMPEQ(var, expect, label) \
+ REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
+ "bne "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
+ __rseq_str(label) "\n"
+
+#define RSEQ_ASM_OP_CMPEQ32(var, expect, label) \
+ "lw "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
+ "bne "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
+ __rseq_str(label) "\n"
+
+#define RSEQ_ASM_OP_CMPNE(var, expect, label) \
+ REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
+ "beq "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
+ __rseq_str(label) "\n"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
+ RSEQ_INJECT_ASM(2) \
+ RSEQ_ASM_OP_CMPEQ32(current_cpu_id, cpu_id, label)
+
+#define RSEQ_ASM_OP_R_LOAD(var) \
+ REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_R_STORE(var) \
+ REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_R_LOAD_OFF(offset) \
+ "add "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(offset) "], " \
+ RSEQ_ASM_TMP_REG_1 "\n" \
+ REG_L RSEQ_ASM_TMP_REG_1 ", (" RSEQ_ASM_TMP_REG_1 ")\n"
+
+#define RSEQ_ASM_OP_R_ADD(count) \
+ "add "RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 \
+ ", %[" __rseq_str(count) "]\n"
+
+#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \
+ RSEQ_ASM_OP_STORE(value, var) \
+ __rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_FINAL_STORE_RELEASE(value, var, post_commit_label) \
+ "fence rw, w\n" \
+ RSEQ_ASM_OP_STORE(value, var) \
+ __rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label) \
+ REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
+ __rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) \
+ "beqz %[" __rseq_str(len) "], 333f\n" \
+ "mv " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(len) "]\n" \
+ "mv " RSEQ_ASM_TMP_REG_2 ", %[" __rseq_str(src) "]\n" \
+ "mv " RSEQ_ASM_TMP_REG_3 ", %[" __rseq_str(dst) "]\n" \
+ "222:\n" \
+ "lb " RSEQ_ASM_TMP_REG_4 ", 0(" RSEQ_ASM_TMP_REG_2 ")\n" \
+ "sb " RSEQ_ASM_TMP_REG_4 ", 0(" RSEQ_ASM_TMP_REG_3 ")\n" \
+ "addi " RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 ", -1\n" \
+ "addi " RSEQ_ASM_TMP_REG_2 ", " RSEQ_ASM_TMP_REG_2 ", 1\n" \
+ "addi " RSEQ_ASM_TMP_REG_3 ", " RSEQ_ASM_TMP_REG_3 ", 1\n" \
+ "bnez " RSEQ_ASM_TMP_REG_1 ", 222b\n" \
+ "333:\n"
+
+#define RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, post_commit_label) \
+ "mv " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(ptr) "]\n" \
+ RSEQ_ASM_OP_R_ADD(off) \
+ REG_L RSEQ_ASM_TMP_REG_1 ", 0(" RSEQ_ASM_TMP_REG_1 ")\n" \
+ RSEQ_ASM_OP_R_ADD(inc) \
+ __rseq_str(post_commit_label) ":\n"
+
+static inline __always_inline
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
+#endif
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG_1
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __always_inline
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ off_t voffp, intptr_t *load, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPNE(v, expectnot, "%l[cmpfail]")
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+ RSEQ_ASM_OP_CMPNE(v, expectnot, "%l[error2]")
+#endif
+ RSEQ_ASM_OP_R_LOAD(v)
+ RSEQ_ASM_OP_R_STORE(load)
+ RSEQ_ASM_OP_R_LOAD_OFF(voffp)
+ RSEQ_ASM_OP_R_FINAL_STORE(v, 3)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+ [load] "m" (*load),
+ [voffp] "r" (voffp)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG_1
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __always_inline
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+#endif
+ RSEQ_ASM_OP_R_LOAD(v)
+ RSEQ_ASM_OP_R_ADD(count)
+ RSEQ_ASM_OP_R_FINAL_STORE(v, 3)
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "m" (*v),
+ [count] "r" (count)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG_1
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __always_inline
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
+#endif
+ RSEQ_ASM_OP_STORE(newv2, v2)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [expect] "r" (expect),
+ [v] "m" (*v),
+ [newv] "r" (newv),
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG_1
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __always_inline
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
+#endif
+ RSEQ_ASM_OP_STORE(newv2, v2)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [expect] "r" (expect),
+ [v] "m" (*v),
+ [newv] "r" (newv),
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG_1
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __always_inline
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error3]")
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_OP_CMPEQ(v2, expect2, "%l[cmpfail]")
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
+ RSEQ_ASM_OP_CMPEQ(v2, expect2, "%l[error3]")
+#endif
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG_1
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2, error3
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+error3:
+ rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __always_inline
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+ __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
+#endif
+ RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [expect] "r" (expect),
+ [v] "m" (*v),
+ [newv] "r" (newv),
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG_1, RSEQ_ASM_TMP_REG_2,
+ RSEQ_ASM_TMP_REG_3, RSEQ_ASM_TMP_REG_4
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __always_inline
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
+#endif
+ RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [expect] "r" (expect),
+ [v] "m" (*v),
+ [newv] "r" (newv),
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG_1, RSEQ_ASM_TMP_REG_2,
+ RSEQ_ASM_TMP_REG_3, RSEQ_ASM_TMP_REG_4
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
+
+/*
+ * pval = *(ptr+off)
+ * *pval += inc;
+ */
+static inline __always_inline
+int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+#endif
+ RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, 3)
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [ptr] "r" (ptr),
+ [off] "er" (off),
+ [inc] "er" (inc)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG_1
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
index 9d850b290c2e..6f7513384bf5 100644
--- a/tools/testing/selftests/rseq/rseq.h
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -93,6 +93,8 @@ static inline struct rseq_abi *rseq_get_abi(void)
#include <rseq-mips.h>
#elif defined(__s390__)
#include <rseq-s390.h>
+#elif defined(__riscv)
+#include <rseq-riscv.h>
#else
#error unsupported target
#endif